Tensor de flujo remodelando un tensor

Estoy tratando de usar tf.nn.sparse_softmax_cross_entropy_with_logits y he seguido la respuesta del usuario Olivier Moindrot [aquí] [1] pero obtengo un error de dimensión

Estoy creando una red de segmentación, por lo que la imagen de entrada es de 200×200 y la de salida es de 200×200. La clasificación es binaria, por lo tanto en primer plano y fondo.

Después de construir el CNN pred = conv_net(x, weights, biases, keep_prob)

pred ve así:

La CNN tiene un par de capas conv seguidas por una capa completamente conectada. La capa totalmente conectada es 40000 porque es 200×200 aplanada.

De acuerdo con el enlace de arriba, remodelaré pred como …

(nota al margen: también traté de empacar tf.pack() dos pred ‘s como arriba – juntos, pero pensé que eso estaba mal)

pred = tf.reshape(pred, [-1, 200, 200, 2])

… para que haya 2 clasificaciones. Continuando el enlace de arriba …

 temp_pred = tf.reshape(pred, [-1,2]) temp_y = tf.reshape(y, [-1]) cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(temp_pred, temp_y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) 

Tengo los siguientes marcadores de posición y datos de lote …

 x = tf.placeholder(tf.float32, [None, 200, 200]) y = tf.placeholder(tf.int64, [None, 200, 200]) (Pdb) batch_x.shape (10, 200, 200) (Pdb) batch_y.shape (10, 200, 200) 

Cuando ejecuto una sesión de entrenamiento, obtengo el siguiente error de dimensión:

 tensorflow.python.framework.errors.InvalidArgumentError: logits first dimension must match labels size. logits shape=[3200000,2] labels shape=[400000] 

Mi código completo se ve así:

 import tensorflow as tf import pdb import numpy as np # Import MINST data # from tensorflow.examples.tutorials.mnist import input_data # mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) # Parameters learning_rate = 0.001 training_iters = 200000 batch_size = 10 display_step = 1 # Network Parameters n_input = 200 # MNIST data input (img shape: 28*28) n_classes = 2 # MNIST total classes (0-9 digits) n_output = 40000 #n_input = 200 dropout = 0.75 # Dropout, probability to keep units # tf Graph input x = tf.placeholder(tf.float32, [None, n_input, n_input]) y = tf.placeholder(tf.int64, [None, n_input, n_input]) keep_prob = tf.placeholder(tf.float32) #dropout (keep probability) # Create some wrappers for simplicity def conv2d(x, W, b, strides=1): # Conv2D wrapper, with bias and relu activation x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME') x = tf.nn.bias_add(x, b) return tf.nn.relu(x) def maxpool2d(x, k=2): # MaxPool2D wrapper return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME') # Create model def conv_net(x, weights, biases, dropout): # Reshape input picture x = tf.reshape(x, shape=[-1, 200, 200, 1]) # Convolution Layer conv1 = conv2d(x, weights['wc1'], biases['bc1']) # Max Pooling (down-sampling) # conv1 = tf.nn.local_response_normalization(conv1) # conv1 = maxpool2d(conv1, k=2) # Convolution Layer conv2 = conv2d(conv1, weights['wc2'], biases['bc2']) # Max Pooling (down-sampling) # conv2 = tf.nn.local_response_normalization(conv2) # conv2 = maxpool2d(conv2, k=2) # Convolution Layer conv3 = conv2d(conv2, weights['wc3'], biases['bc3']) # # Max Pooling (down-sampling) # conv3 = tf.nn.local_response_normalization(conv3) # conv3 = maxpool2d(conv3, k=2) # return conv3 # Fully connected layer # Reshape conv2 output to fit fully connected layer input fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]]) fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']) fc1 = tf.nn.relu(fc1) # Apply Dropout fc1 = tf.nn.dropout(fc1, dropout) return tf.add(tf.matmul(fc1, weights['out']), biases['out']) # Output, class prediction # output = [] # for i in xrange(2): # # output.append(tf.nn.softmax(tf.add(tf.matmul(fc1, weights['out']), biases['out']))) # output.append((tf.add(tf.matmul(fc1, weights['out']), biases['out']))) # # return output # Store layers weight & bias weights = { # 5x5 conv, 1 input, 32 outputs 'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])), # 5x5 conv, 32 inputs, 64 outputs 'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])), # 5x5 conv, 32 inputs, 64 outputs 'wc3': tf.Variable(tf.random_normal([5, 5, 64, 128])), # fully connected, 7*7*64 inputs, 1024 outputs 'wd1': tf.Variable(tf.random_normal([50*50*64, 1024])), # 1024 inputs, 10 outputs (class prediction) 'out': tf.Variable(tf.random_normal([1024, n_output])) } biases = { 'bc1': tf.Variable(tf.random_normal([32])), 'bc2': tf.Variable(tf.random_normal([64])), 'bc3': tf.Variable(tf.random_normal([128])), 'bd1': tf.Variable(tf.random_normal([1024])), 'out': tf.Variable(tf.random_normal([n_output])) } # Construct model pred = conv_net(x, weights, biases, keep_prob) pdb.set_trace() # pred = tf.pack(tf.transpose(pred,[1,2,0])) pred = tf.reshape(pred, [-1, n_input, n_input, 2]) temp_pred = tf.reshape(pred, [-1,2]) temp_y = tf.reshape(y, [-1]) # Define loss and optimizer cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(temp_pred, temp_y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Evaluate model # correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) temp_pred2 = tf.reshape(pred, [-1,n_input,n_input]) correct_pred = tf.equal(tf.cast(y,tf.float32),tf.sub(temp_pred2,tf.cast(y,tf.float32))) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # Initializing the variables init = tf.initialize_all_variables() # Launch the graph with tf.Session() as sess: sess.run(init) summ = tf.train.SummaryWriter('/tmp/logdir/', sess.graph_def) step = 1 from tensorflow.contrib.learn.python.learn.datasets.scroll import scroll_data data = scroll_data.read_data('/home/kendall/Desktop/') # Keep training until reach max iterations while step * batch_size < training_iters: batch_x, batch_y = data.train.next_batch(batch_size) # Run optimization op (backprop) batch_x = batch_x.reshape((batch_size, n_input, n_input)) batch_y = batch_y.reshape((batch_size, n_input, n_input)) batch_y = np.int64(batch_y) # y = tf.reshape(y, [-1,n_input,n_input]) pdb.set_trace() sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout}) if step % display_step == 0: # Calculate batch loss and accuracy pdb.set_trace() loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: 1.}) print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \ "{:.6f}".format(loss) + ", Training Accuracy= " + \ "{:.5f}".format(acc) step += 1 print "Optimization Finished!" # Calculate accuracy for 256 mnist test images print "Testing Accuracy:", \ sess.run(accuracy, feed_dict={x: data.test.images[:256], y: data.test.labels[:256], keep_prob: 1.}) [1]: http://stackoverflow.com/questions/35317029/how-to-implement-pixel-wise-classification-for-scene-labeling-in-tensorflow/37294185?noredirect=1#comment63253577_37294185 

Olvidémonos de softmax y utilicemos un tf.nn.sigmoid_cross_entropy_with_logits más tf.nn.sigmoid_cross_entropy_with_logits aquí:

  • con sigmoide, solo necesitas una predicción por píxel
    • Si pred [píxel]> 0.5, predices 1
    • Si pred [píxel] <0.5, predice 0
  • la forma de la predicción y el destino deben ser [batch_size, 40000]
 pred = conv_net(x, weights, biases, keep_prob) # shape [batch_size, 40000] flattened_y = tf.reshape(y, [-1, 40000]) # shape [batch_size, 40000] loss = tf.nn.sigmoid_cross_entropy_with_logits(pred, flattened_y) 

El uso de softmax escaso será de ayuda solo después de la última capa en la que desea cambiar el tamaño de la imagen al tamaño original (200 * 200). En este caso, usar la remodelación como lo haría para asegurar que el código esté libre de errores. Pero en tu caso no tienes que usar softmax escaso. Para ver por qué comprobar las dimensiones de “pred”.