Regresión lineal multivariable de flujo tensor no convergente

Estoy tratando de entrenar un modelo de regresión lineal multivariante con regularización usando tensorflow. Por alguna razón, no puedo obtener la parte de entrenamiento del código a continuación para calcular el error que quiero usar para la actualización de pendiente de gradiente. ¿Estoy haciendo algo mal al configurar mi gráfico?

def normalize_data(matrix): averages = np.average(matrix,0) mins = np.min(matrix,0) maxes = np.max(matrix,0) ranges = maxes - mins return ((matrix - averages)/ranges) def run_regression(X, Y, X_test, Y_test, lambda_value = 0.1, normalize=False, batch_size=10): x_train = normalize_data(X) if normalize else X y_train = Y x_test = X_test y_test = Y_test session = tf.Session() # Calculate number of features for X and Y x_features_length = len(X[0]) y_features_length = len(Y[0]) # Build Tensorflow graph parts x = tf.placeholder('float', [None, x_features_length], name="X") y = tf.placeholder('float', [None, y_features_length], name="Y") theta = tf.Variable(tf.random_normal([x_features_length, y_features_length], stddev=0.01), name="Theta") lambda_val = tf.constant(lambda_value) # Trying to implement this way http://openclassroom.stanford.edu/MainFolder/DocumentPage.php?course=MachineLearning&doc=exercises/ex5/ex5.html y_predicted = tf.matmul(x, theta, name="y_predicted") regularization_cost_part = tf.cast(tf.mul(lambda_val,tf.reduce_sum(tf.pow(theta,2)), name="regularization_param"), 'float') polynomial_cost_part = tf.reduce_sum(tf.pow(tf.sub(y_predicted, y), 2), name="polynomial_sum") # Set up some summary info to debug with tf.name_scope('cost') as scope: cost_func = tf.mul(tf.cast(1/(2*batch_size), 'float'), tf.cast(tf.add(polynomial_cost_part, regularization_cost_part), 'float')) cost_summary = tf.scalar_summary("cost", cost_func) training_func = tf.train.GradientDescentOptimizer(0.03).minimize(cost_func) with tf.name_scope("test") as scope: correct_prediction = tf.sub(tf.cast(1, 'float'), tf.reduce_mean(tf.sub(y_predicted, y))) accuracy = tf.cast(correct_prediction, "float") accuracy_summary = tf.scalar_summary("accuracy", accuracy) saver = tf.train.Saver() merged = tf.merge_all_summaries() writer = tf.train.SummaryWriter("/tmp/football_logs", session.graph_def) init = tf.initialize_all_variables() session.run(init) for i in range(0, (len(x_train)/batch_size)): session.run(training_func, feed_dict={x: x_train[i*batch_size:i*batch_size+batch_size], y: y_train[i*batch_size:i*batch_size+batch_size]}) if i % batch_size == 0: result = session.run([merged, accuracy], feed_dict={x: x_test, y: y_test}) writer.add_summary(result[0], i) print "step %d, training accuracy %g"%(i, result[1]) print "test accuracy %g"%session.run(accuracy, feed_dict={x: x_test, y: y_test}) save_path = saver.save(session, "/tmp/football.ckpt") print "Model saved in file: ", save_path session.close() 

mi salida se ve así

 step 0, training accuracy 39.1802 step 10, training accuracy 39.1802 step 20, training accuracy 39.1802 ... step 210, training accuracy 39.1802 test accuracy 39.1802 Model saved in file: /tmp/football.ckpt 

De hecho, parece ser un problema con la tasa de aprendizaje: 0.03 puede ser demasiado alto dependiendo de cómo se ven sus datos. Además, es probable que desee crear su gráfico separado de la sesión de una manera más explícita, o incluso usar las ecuaciones normales para alcanzar la solución óptima sin tener que iterar, si su conjunto de datos tiene una dimensionalidad media / baja. ¡Aquí publiqué algunos ejemplos que pueden ser útiles! Además, los tutoriales de TF lo cubren bien (busque “Progtwig completo” en esa página).

Pero con respecto a su código, aquí hay una versión que me funcionó: cambié algunas funciones en desuso y básicamente establecí la velocidad de aprendizaje en un valor mucho más bajo alpha=1e-8 , que (en el conjunto de datos sintético también generado en el código) parece para converger:

 test accuracy 2176.11 test accuracy 1898.6 test accuracy 1663.69 test accuracy 1458.53 test accuracy 1287.57 test accuracy 1116.9 test accuracy 969.474 test accuracy 841.028 test accuracy 738.592 test accuracy 649.891 test accuracy 565.188 test accuracy 495.33 test accuracy 438.351 test accuracy 381.161 test accuracy 333.213 test accuracy 289.575 test accuracy 254.394 test accuracy 222.836 test accuracy 197.36 test accuracy 172.788 test accuracy 152.251 test accuracy 132.664 test accuracy 115.982 test accuracy 101.021 final test accuracy 90.2555 

CÓDIGO:

 import tensorflow as tf import numpy as np # generate some dataset DIMENSIONS = 5 DS_SIZE = 5000 TRAIN_RATIO = 0.5 # 50% of the dataset isused for training _train_size = int(DS_SIZE*TRAIN_RATIO) _test_size = DS_SIZE - _train_size f = lambda(x): sum(x) # the "true" function: f = 0 + 1*x1 + 1*x2 + 1*x3 ... noise = lambda: np.random.normal(0,10) # some noise # training globals LAMBDA = 1e6 # L2 regularization factor # generate the dataset, the labels and split into train/test ds = [[np.random.rand()*1000 for d in range(DIMENSIONS)] for _ in range(DS_SIZE)] ds = [([1]+x, [f(x)+noise()]) for x in ds] # add x[0]=1 dimension and labels np.random.shuffle(ds) train_data, train_labels = zip(*ds[0:_train_size]) test_data, test_labels = zip(*ds[_train_size:]) def normalize_data(matrix): averages = np.average(matrix,0) mins = np.min(matrix,0) maxes = np.max(matrix,0) ranges = maxes - mins return ((matrix - averages)/ranges) def run_regression(X, Y, X_test, Y_test, lambda_value = 0.1, normalize=False, batch_size=10, alpha=1e-8): x_train = normalize_data(X) if normalize else X y_train = Y x_test = X_test y_test = Y_test session = tf.Session() # Calculate number of features for X and Y x_features_length = len(X[0]) y_features_length = len(Y[0]) # Build Tensorflow graph parts x = tf.placeholder('float', [None, x_features_length], name="X") y = tf.placeholder('float', [None, y_features_length], name="Y") theta = tf.Variable(tf.random_normal([x_features_length, y_features_length], stddev=0.01), name="Theta") lambda_val = tf.constant(lambda_value) # Trying to implement this way http://openclassroom.stanford.edu/MainFolder/DocumentPage.php?course=MachineLearning&doc=exercises/ex5/ex5.html y_predicted = tf.matmul(x, theta, name="y_predicted") #regularization_cost_part = tf.cast(tf.multiply(lambda_val,tf.reduce_sum(tf.pow(theta,2)), name="regularization_param"), 'float') #polynomial_cost_part = tf.reduce_sum(tf.pow(tf.subtract(y_predicted, y), 2), name="polynomial_sum") # Set up some summary info to debug with tf.name_scope('cost') as scope: #cost_func = tf.multiply(tf.cast(1/(2*batch_size), 'float'), tf.cast(tf.add(polynomial_cost_part, regularization_cost_part), 'float')) cost_func = (tf.nn.l2_loss(y_predicted - y)+lambda_val*tf.nn.l2_loss(theta))/float(batch_size) #DEPRECATED*** cost_summary = tf.scalar_summary("cost", cost_func) cost_summary = tf.summary.scalar('cost', cost_func)# Add a scalar summary for the snapshot loss. training_func = tf.train.GradientDescentOptimizer(alpha).minimize(cost_func) with tf.name_scope("test") as scope: correct_prediction = tf.subtract(tf.cast(1, 'float'), tf.reduce_mean(tf.subtract(y_predicted, y))) accuracy = tf.cast(correct_prediction, "float") #DEPRECATED*** accuracy_summary = tf.scalar_summary("accuracy", accuracy) #accuracy_summary = tf.summary.scalar("accuracy", accuracy) saver = tf.train.Saver() #DEPRECATED*** merged = tf.merge_all_summaries() merged = tf.summary.merge_all() #DEPRECATED*** writer = tf.train.SummaryWriter("/tmp/football_logs", session.graph_def) writer = tf.summary.FileWriter("/tmp/football_logs", session.graph) #DEPRECATED*** init = tf.initialize_all_variables() init = tf.global_variables_initializer() session.run(init) for i in range(1, (len(x_train)/batch_size)): session.run(training_func, feed_dict={x: x_train[i*batch_size:i*batch_size+batch_size], y: y_train[i*batch_size:i*batch_size+batch_size]}) if i % batch_size == 0: print "test accuracy %g"%session.run(accuracy, feed_dict={x: x_test, y: y_test}) #result = session.run([merged, accuracy], feed_dict={x: x_test, y: y_test}) # writer.add_summary(result[0], i) # print "step %d, training accuracy %g"%(i, result[1]) #writer.flush() print "final test accuracy %g"%session.run(accuracy, feed_dict={x: x_test, y: y_test}) # save_path = saver.save(session, "/tmp/football.ckpt") # print "Model saved in file: ", save_path session.close() run_regression(train_data, train_labels, test_data, test_labels, normalize=False, alpha=1e-8) 

Como dije, probablemente querrá cambiar la estructura para favorecer la legibilidad y la escalabilidad, ¡pero espero que esto ayude!

Saludos, andres