定义一个线性回归模型:linear_regression_model.py
import tensorflow as tf
import numpy as np
class linearRegressionModel:
def __init__(self,x_dimen):
self.x_dimen = x_dimen
self._index_in_epoch = 0
self.constructModel()
self.sess = tf.Session()
self.sess.run(tf.global_variables_initializer())
#权重初始化
def weight_variable(self,shape):
initial = tf.truncated_normal(shape,stddev = 0.1)
return tf.Variable(initial)
#偏置项初始化
def bias_variable(self,shape):
initial = tf.constant(0.1,shape=shape)
return tf.Variable(initial)
#每次选取100个样本,如果选完,重新打乱
def next_batch(self,batch_size):
start = self._index_in_epoch
self._index_in_epoch += batch_size
if self._index_in_epoch > self._num_datas: #如果选完
perm = np.arange(self._num_datas)
np.random.shuffle(perm) #打乱数据
self._datas = self._datas[perm]
self._labels = self._labels[perm]
start = 0
self._index_in_epoch = batch_size
assert batch_size <= self._num_datas #保证batch_size是小于等于数据集的
end = self._index_in_epoch
return self._datas[start:end],self._labels[start:end]
def constructModel(self):
self.x = tf.placeholder(tf.float32,[None,self.x_dimen])
self.y = tf.placeholder(tf.float32,[None,1])
self.w = self.weight_variable([self.x_dimen,1])
self.b = self.bias_variable([1])
self.y_prec = tf.nn.bias_add(tf.matmul(self.x,self.w),self.b)
mse = tf.reduce_mean(tf.squared_difference(self.y_prec,self.y)) # mean square error
l2 = tf.reduce_mean(tf.square(self.w)) # L2正则化(regularization):w的平方和,除以训练集的大小
self.loss = mse + 0.15*l2
self.train_step = tf.train.AdamOptimizer(0.1).minimize(self.loss)
def train(self,x_train,y_train,x_test,y_test):
self._datas = x_train
self._labels = y_train
self._num_datas = x_train.shape[0]
for i in range(5000):
batch = self.next_batch(100)
self.sess.run(self.train_step,feed_dict={self.x:batch[0],self.y:batch[1]})
if i%10 == 0:
train_loss = self.sess.run(self.loss,feed_dict={self.x:batch[0],self.y:batch[1]})
print('step %d,test_loss %f' %(i,train_loss))
def predict_batch(self,arr,batch_size):
for i in range(0,len(arr),batch_size):
yield arr[i:i+batch_size]
#如果一个函数包含yield关键字,这个函数就会变成一个生成器
#每一个生成器都是迭代器
#生成器并不会一次返回所有结果,而是每次遇到yield关键字后返回相应结果,并保留函数当前的运行状态,等待下一次的调用
#生成器,对其调用next()才开始执行,在for循环中自动调用next()
def predict(self,x_predict):
pred_list = []
for x_test_batch in self.predict_batch(x_predict,100):
pred = self.sess.run(self.y_prec,{self.x:x_test_batch})
pred_list.append(pred)
return np.vstack(pred_list) # v代表vertical,沿水平方向堆叠,shape=(len(pred_list),1)
定义run文件:run.py
参考:
sklearn.datasets.make_regression
sklearn.metrics.r2_score
sklearn.linear_model.LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from linear_regression_model import linearRegressionModel as lrm
if __name__ == '__main__':
x,y = make_regression(7000) # generate a random regression problem
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.5) #随机分配训练集和测试集
y_lrm_train = y_train.reshape(-1,1)
y_lrm_test = y_test.reshape(-1,1)
linear = lrm(x.shape[1])
linear.train(x_train,y_lrm_train,x_test,y_lrm_test)
y_predict = linear.predict(x_test)
print('###########')
print(y_predict)
print('###########')
# 比较tensorflow线性回归模型和sklearn中的线性回归模型
# 采用r2_score评分函数
print('Tensorflow R2:',r2_score(y_lrm_test.ravel(),y_predict.ravel())) #ravel展开成一维行向量
lr = LinearRegression() # sklearn中的线性回归模型
y_predict = lr.fit(x_train,y_train).predict(x_test)
print('Sklearn R2:',r2_score(y_test,y_predict))
# print('Sklearn R2:',lr.score(x_train,y_train))