Implementando seq2seq con haz de busqueda

Ahora estoy implementando el modelo seq2seq basado en el código de ejemplo que proporciona tensorflow . Y quiero obtener un top-5 salidas de decodificador para hacer un aprendizaje de refuerzo.

Sin embargo, implementaron un modelo de traducción con decodificador de atención, por lo que debería implementar la búsqueda por haz para obtener resultados de primer nivel.

Hay una parte del código que ahora implementa (este código se agrega a translate.py ).

Referencia por https://github.com/tensorflow/tensorflow/issues/654

 with tf.Graph().as_default(): beam_size = FLAGS.beam_size # Number of hypotheses in beam num_symbols = FLAGS.tar_vocab_size # Output vocabulary size embedding_size = 10 num_steps = 5 embedding = tf.zeros([num_symbols, embedding_size]) output_projection = None log_beam_probs, beam_symbols, beam_path = [], [], [] def beam_search(prev, i): if output_projection is not None: prev = tf.nn.xw_plus_b(prev, output_projection[0], output_projection[1]) probs = tf.log(tf.nn.softmax(prev)) if i > 1: probs = tf.reshape(probs + log_beam_probs[-1], [-1, beam_size * num_symbols]) best_probs, indices = tf.nn.top_k(probs, beam_size) indices = tf.stop_gradient(tf.squeeze(tf.reshape(indices, [-1, 1]))) best_probs = tf.stop_gradient(tf.reshape(best_probs, [-1, 1])) symbols = indices % num_symbols # which word in vocabulary beam_parent = indices // num_symbols # which hypothesis it came from beam_symbols.append(symbols) beam_path.append(beam_parent) log_beam_probs.append(best_probs) return tf.nn.embedding_lookup(embedding, symbols) # Setting up graph. inputs = [tf.placeholder(tf.float32, shape=[None, num_symbols]) for i in range(num_steps)] for i in range(num_steps): beam_search(inputs[i], i+1) input_vals = tf.zeros([1, beam_size], dtype=tf.float32) input_feed = {inputs[i]: input_vals[i][:beam_size, :] for i in xrange(num_steps)} output_feed = beam_symbols + beam_path + log_beam_probs session = tf.InteractiveSession() outputs = session.run(output_feed, feed_dict=input_feed) print("Top_5 Sentences ") for predicted in enumerate(outputs[:5]): print(list(predicted)) print("\n") 

En la parte input_feed, hay un error:

 ValueError: Shape (1, 12) must have rank 1 

¿Hay algún problema en mi código para hacer una búsqueda por haz ?

Una demo probada y verdadera:

 # -*- coding: utf-8 -*- from __future__ import unicode_literals, print_function from __future__ import absolute_import from __future__ import division import tensorflow as tf tf.app.flags.DEFINE_integer('beam_size', 4, 'beam size for beam search decoding.') tf.app.flags.DEFINE_integer('vocab_size', 40, 'vocabulary size.') tf.app.flags.DEFINE_integer('batch_size', 5, 'the batch size.') tf.app.flags.DEFINE_integer('num_steps', 10, 'the batch size.') tf.app.flags.DEFINE_integer('embedding_size', 50, 'the batch size.') FLAGS = tf.app.flags.FLAGS with tf.Graph().as_default(): batch_size = FLAGS.batch_size beam_size = FLAGS.beam_size # Number of hypotheses in beam vocab_size = FLAGS.vocab_size # Output vocabulary size num_steps = FLAGS.num_steps embedding_size = FLAGS.embedding_size embedding = tf.random_normal([vocab_size, embedding_size], -2, 4, dtype=tf.float32, seed=0) output_projection = [ tf.random_normal([embedding_size, vocab_size], mean=2, stddev=1, dtype=tf.float32, seed=0), tf.random_normal([vocab_size], mean=0, stddev=1, dtype=tf.float32, seed=0), ] index_base = tf.reshape( tf.tile(tf.expand_dims(tf.range(batch_size) * beam_size, axis=1), [1, beam_size]), [-1]) log_beam_probs, beam_symbols = [], [] def beam_search(prev, i): if output_projection is not None: prev = tf.nn.xw_plus_b(prev, output_projection[0], output_projection[1]) # (batch_size*beam_size, embedding_size) -> (batch_size*beam_size, vocab_size) log_probs = tf.nn.log_softmax(prev) if i > 1: # total probability log_probs = tf.reshape(tf.reduce_sum(tf.stack(log_beam_probs, axis=1), axis=1) + log_probs, [-1, beam_size * vocab_size]) # (batch_size*beam_size, vocab_size) -> (batch_size, beam_size*vocab_size) best_probs, indices = tf.nn.top_k(log_probs, beam_size) # (batch_size, beam_size) indices = tf.squeeze(tf.reshape(indices, [-1, 1])) best_probs = tf.reshape(best_probs, [-1, 1]) # (batch_size*beam_size) symbols = indices % vocab_size # which word in vocabulary beam_parent = indices // vocab_size # which hypothesis it came from beam_symbols.append(symbols) # (batch_size*beam_size, num_steps) real_path = beam_parent + index_base # get rid of the previous probability if i > 1: pre_sum = tf.reduce_sum(tf.stack(log_beam_probs, axis=1), axis=1) pre_sum = tf.gather(pre_sum, real_path) else: pre_sum = 0 log_beam_probs.append(best_probs-pre_sum) # adapt the previous symbols according to the current symbol if i > 1: for j in range(i)[:0:-1]: beam_symbols[j-1] = tf.gather(beam_symbols[j-1], real_path) log_beam_probs[j-1] = tf.gather(log_beam_probs[j-1], real_path) return tf.nn.embedding_lookup(embedding, symbols) # (batch_size*beam_size, embedding_size) # Setting up graph. init_input = tf.placeholder(tf.float32, shape=[batch_size, embedding_size]) next_input = init_input for i in range(num_steps): next_input = beam_search(next_input, i+1) seq_rank = tf.stack(values=beam_symbols, axis=1) seq_rank = tf.reshape(seq_rank, [batch_size, beam_size, num_steps]) # (batch_size*beam_size, num_steps) init_in = tf.random_uniform([batch_size], minval=0, maxval=vocab_size, dtype=tf.int32, seed=0), init_emb = tf.squeeze(tf.nn.embedding_lookup(embedding, init_in)) session = tf.InteractiveSession() init_emb = init_emb.eval() seq_rank = session.run(seq_rank, feed_dict={init_input: init_emb}) best_seq = seq_rank[:, 1, :] for i in range(batch_size): print("rank %s" % i, end=": ") print(best_seq[i]) 

Se simplifica desde el modelo de búsqueda de haz en mi modelo seq2seq. Python2.7 y TF1.4