记录使用TensorFlow1.x和TensorFlow2.x完成多元线性回归的过程。
在此使用波士顿房价数据集,包含506个样本,输入为12个房屋信息特征,输出为房价。
使用pandas库读取csv文件,对数据进行归一化以消除不同维度量级上的差异,进行训练集与测试集的划分以评估训练结果。
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
#数据读取
df = pd.read_csv(r'database/boston.csv', header=0)
x_data = np.array(df.values)[:,:12]
y_data = np.array(df.values)[:,12]
#数据归一化
for i in range(12):
x_data[:,i] = x_data[:,i] / (np.max(x_data[:,i]) - np.min(x_data[:,i]))
#数据集划分
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3)
#维度变换
x_train = x_train.reshape((-1, 12))
x_test = x_test.reshape((-1, 12))
y_train = y_train.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))
import tensorflow.compat.v1 as tf
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
tf.disable_eager_execution()
#使用命名空间对节点打包
with tf.name_scope('Model'):
#创建变量
x = tf.placeholder(tf.float32, [None, 12], name='X')
y = tf.placeholder(tf.float32, [None, 1], name='Y')
w = tf.Variable(tf.random.normal((12,1)), name='w')
b = tf.Variable(tf.random.normal((1, 1)), name='b')
def model(x, w, b):
return tf.matmul(x, w) + b
#预测节点
pred = model(x, w, b)
使用小批量梯度下降进行训练,每轮过后打乱训练集顺序。
#训练参数
train_epoch = 100
learning_rate = 0.01
batch_size = 100
batch_num = (x_train.shape[0] // batch_size)
#损失函数
step = 0
display_step = 5
loss_list_test = []
loss_list_train = []
with tf.name_scope('LossFunction'):
loss_function = tf.reduce_mean(tf.square(y - pred))
#定义优化器
optimizer = tf.train.GradientDescentOptimizer(learning_rate)\
.minimize(loss_function)
#变量初始化
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
迭代训练
for epoch in range(train_epoch):
for batch in range(batch_num):
xi = x_train[batch * batch_size:(batch + 1) * batch_size]
yi = y_train[batch * batch_size:(batch + 1) * batch_size]
sess.run(optimizer, feed_dict={x:xi, y:yi})
step = step + 1
if step % display_step == 0:
loss_train = sess.run(loss_function,\
feed_dict={x:x_train, y:y_train})
loss_test= sess.run(loss_function,\
feed_dict={x:x_test, y:y_test})
loss_list_train.append(loss_train)
loss_list_test.append(loss_test)
#print('w=', sess.run(w), '\n', 'b=', sess.run(b))
#打乱训练集顺序
x_train, y_train = shuffle(x_train, y_train)
plt.plot(loss_list_train, 'b-')
plt.plot(loss_list_test, 'r-')
print('train_epoch=', train_epoch)
print('learning_rate', learning_rate)
print('batch_size=', batch_size)
测试集损失函数比训练集下降得更快
增大训练轮数,减少批次样本量,损失函数进一步下降
将测试集比例设置为0.99,只用5个样本作为训练集,经过数轮训练后,训练集损失函数趋于0,测试集损失函数上升,产生了过拟合。
从测试集随机抽取样本进行预测
i = np.random.randint(0, 50)
print('第%i个样本:' % i)
print('预测值:', sess.run(pred, feed_dict={x:x_test[i].reshape((1, 12))}))
print('实际值:', y_test[i])
sess.close()
在变量初始化后加入:
#设置存储目录
tf.reset_default_graph()
log_dir = 'G://log'
#记录损失值
sum_loss_op = tf.summary.scalar('loss', loss_function)
#合并写入
merged = tf.summary.merge_all()
#文件写入器
write = tf.summary.FileWriter(log_dir, sess.graph)
将损失值加入摘要:
loss_train, sum_loss = sess.run([loss_function, sum_loss_op],\
feed_dict={x:x_train, y:y_train})
write.add_summary(sum_loss, epoch)
关闭:
write.close()
在Anaconda Prompt中进入日志目录,运行TensorBoard,访问网址。
可以看到损失值和计算图
import tensorflow as tf
import matplotlib.pyplot as plt
def model(x, w, b):
return tf.matmul(x, w) + b
def loss_function(x, y, w, b):
pred = model(x, w, b)
loss = tf.reduce_mean(tf.square(y - pred))
return loss
def grad(x, y, w, b):
with tf.GradientTape() as tape:
loss = loss_function(x, y, w, b)
return tape.gradient(loss, [w, b])
w = tf.Variable(tf.random.normal((12,1)), dtype=tf.float32)
b = tf.Variable(tf.random.normal((1,1)), dtype=tf.float32)
矩阵乘法需要转化为Tensor
x_train = tf.cast(x_train, tf.float32)
x_test = tf.cast(x_test, tf.float32)
y_train = tf.cast(y_train, tf.float32)
y_test = tf.cast(y_test, tf.float32)
#训练参数
train_epoch = 100
learning_rate = 0.01
batch_size = 100
batch_num = x_train.shape[0] // batch_size
step = 0
display_step = 5
loss_list_train = []
loss_list_test = []
#创建优化器
optimizer = tf.keras.optimizers.SGD(learning_rate)
迭代训练
for epoch in range(train_epoch):
for batch in range(batch_num):
xi = x_train[batch*batch_size : (batch+1)*batch_size]
yi = y_train[batch*batch_size : (batch+1)*batch_size]
grads = grad(xi, yi, w, b)
#应用梯度
optimizer.apply_gradients(zip(grads, [w,b]))
step = step + 1
if step % display_step == 0:
loss_list_train.append(loss_function(x_train, y_train, w, b))
loss_list_test.append(loss_function(x_test, y_test, w, b))
#使用tf.random.shuffle打乱Tensor类型的数据集
train_data = tf.concat([x_train, y_train], axis=1)
train_data = tf.random.shuffle(train_data)
x_train = train_data[:, :12]
y_train = tf.reshape(train_data[:, 12], (-1,1))
plt.plot(loss_list_train, 'b-')
plt.plot(loss_list_test, 'r-')
print('train_epoch=', train_epoch)
print('learning_rate', learning_rate)
print('batch_size=', batch_size)
i = np.random.randint(0, 50)
pred = model(tf.reshape(x_test[i], (1,12)), w, b).numpy()
print('第%i个样本:' % i)
print('预测值:', pred)
print('实际值:', y_test[i].numpy())
TensorFlow1.x与TensorFlow2.x部分语法存在差异,使用TensorBoard进行展示时需要将之前的log文件删除;进行矩阵乘法时,需要注意数据类型与维度;由于数据集较小,采用的是线性回归,模型的准确程度有待优化。