import tensorflow as tf import os # 初始化变量和模型参数,定义训练闭环中的运算 W = tf.Variable(tf.zeros([5, 1]), name="weights") b = tf.Variable(0., name="bias") def combine_inputs(X): # 计算推断模型在数据X上的输出,并将结果保存 return tf.matmul(X, W) + b def inference(X): # 计算推断模型在数据X上的输出,并将结果保存 return tf.sigmoid(combine_inputs(X)) def loss(X, Y): # 依据训练数据X和期望输出Y计算损失 return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=combine_inputs(X), logits=Y)) def read_csv(batch_size, file_name, record_defaults): filename_queue = tf.train.string_input_producer([os.path.join(os.getcwd(), file_name)]) reader = tf.TextLineReader(skip_header_lines=1) key, value = reader.read(filename_queue) # decode_csv will convert a Tensor from type string (the text line) in # a tuple of tensor columns with the specified defaults, which also # sets the data type for each column decoded = tf.decode_csv(value, record_defaults=record_defaults) # batch actually reads the file and loads "batch_size" rows in a single tensor return tf.train.shuffle_batch(decoded, batch_size=batch_size, capacity=batch_size * 50, min_after_dequeue=batch_size) def inputs(): # 读取或生成训练数据X及其期望输出Y passenger_id, survived, pclass, name, sex, age, sibsp, parch, ticket, fare, cabin, embarked = \ read_csv(100, "titanic/train.csv", [[0.0], [0.0], [0], [""], [""], [0.0], [0.0], [0.0], [""], [0.0], [""], [""]]) # convert categorical data is_first_class = tf.to_float(tf.equal(pclass, [1])) is_second_class = tf.to_float(tf.equal(pclass, [2])) is_third_class = tf.to_float(tf.equal(pclass, [3])) gender = tf.to_float(tf.equal(sex, ["female"])) # Finally we pack all the features in a single matrix; # We then transpose to have a matrix with one example per row and one feature per column. features = tf.transpose(tf.stack([is_first_class, is_second_class, is_third_class, gender, age])) survived = tf.reshape(survived, [100, 1]) return features, survived def train(total_loss): # 依据计算的总损失训练或调整模型参数 learning_rate = 0.01 return tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss) def evaluate(sess, X, Y): # 对训练得到的模型进行评估 predicted = tf.cast(inference(X) > 0.5, tf.float32) print(sess.run(tf.reduce_mean(tf.cast(tf.equal(predicted, Y), tf.float32)))) # 在一个会话对象中启动数据流图,搭建流程 with tf.Session() as sess: tf.global_variables_initializer().run() X, Y = inputs() total_loss = loss(X, Y) train_op = train(total_loss) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # 实际的训练迭代次数 training_steps = 1000 for step in range(training_steps): sess.run([train_op]) # 处于调试和学习的目的,查看损失在训练过程中的递减情况 if step % 10 == 0: print("loss:", sess.run([total_loss])) evaluate(sess, X, Y) import time time.sleep(5) coord.request_stop() coord.join(threads) sess.close()数据来源: https://www.kaggle.com/c/titanic/data