原文地址
代码如下
#本版本作为tensorlow的基础class版本
#后续将逐步完善在类中使用tensorboard、冻结图与下载冻结图、分布式训练的内容
from sklearn.datasets import load_boston
import numpy as np
import pandas as pd
import os
import sys
import tensorflow as tf
def load_data():
feature,label= load_boston(return_X_y=True)
label = np.where(label>10,1,-1)
train_data = {}
train_data["y_train"] = label.reshape(len(label),1)
data = pd.DataFrame(feature)
data.columns = ["c"+str(i) for i in range(data.shape[1])]
co_feature = pd.DataFrame()
ca_feature = pd.DataFrame()
ca_col = []
co_col = []
feat_dict = {}
cnt = 1
for i in range(data.shape[1]):
target = data.iloc[:,i]
col = target.name
l = len(set(target))
if l>10:
target = (target-target.mean())/target.std()
co_feature = pd.concat([co_feature,target],axis = 1)
feat_dict[col] = cnt
cnt += 1
co_col.append(col)
else:
us = target.unique()
feat_dict[col] = dict(zip(us,range(cnt,len(us)+cnt)))
ca_feature = pd.concat([ca_feature, target], axis=1)
cnt += len(us)
ca_col.append(col)
feat_dim = cnt
feature_value = pd.concat([co_feature, ca_feature], axis=1)
feature_index = feature_value.copy()
for i in feature_index.columns:
if i in co_col:
feature_index[i] = feat_dict[i]
else:
feature_index[i] = feature_index[i].map(feat_dict[i])
feature_value[i] = 1.
train_data["xi"] = feature_index.values.tolist()
train_data['xv'] = feature_value.values.tolist()
train_data['feat_dim'] = feat_dim
return train_data
###########################################################################
import os
import sys
import tensorflow as tf
#BASE_PATH = os.path.dirname(os.path.dirname(__file__))
BASE_PATH = "H:/"
class Args:
feature_sizes = 100
field_size = 15
embedding_size = 256
deep_layers = [512, 256, 128]
epoch = 3
batch_size = 64
#1e-2 1e-3 1e-4
learning_rate = 1.0
#防止过拟合
l2_reg_rate = 0.01
checkpoint_dir = os.path.join(BASE_PATH, 'data/saver/ckpt')
is_training = True
#####################################################################
class model:
def __init__(self,args):
self.feature_sizes = args.feature_sizes
self.field_size = args.field_size
self.embedding_size = args.embedding_size
self.deep_layers = args.deep_layers
self.l2_reg_rate = args.l2_reg_rate
self.epoch = args.epoch
self.batch_size = args.batch_size
self.learning_rate = args.learning_rate
self.deep_activation = tf.nn.relu
self.weight = dict()
self.checkpoint_dir = args.checkpoint_dir
self.build_model()
def build_model(self):
###########################################step1:输入数据占位符
#x
self.feat_index = tf.placeholder(tf.int32, shape=[None, None], name='feature_index')
self.feat_value = tf.placeholder(tf.float32, shape=[None, None], name='feature_value')
#y
self.label = tf.placeholder(tf.float32, shape=[None, None], name='label')
###########################################step2:权重,偏置参数定义
#参数定义在一起
self.weight['feature_weight'] = tf.Variable(tf.random_normal([self.feature_sizes,self.embedding_size],0.0,0.01),name='feature_weight')
self.weight['feature_first'] = tf.Variable(tf.random_normal([self.feature_sizes, 1], 0.0, 1.0),name='feature_first')
num_layer = len(self.deep_layers)
input_size = self.field_size * self.embedding_size
init_method = np.sqrt(2.0 / (input_size + self.deep_layers[0]))
self.weight['layer_0'] = tf.Variable(np.random.normal(loc=0, scale=init_method, size=(input_size, self.deep_layers[0])), dtype=np.float32)
self.weight['bias_0'] = tf.Variable(np.random.normal(loc=0, scale=init_method, size=(1, self.deep_layers[0])), dtype=np.float32)
if num_layer != 1:
for i in range(1, num_layer):
init_method = np.sqrt(2.0 / (self.deep_layers[i - 1] + self.deep_layers[i]))
self.weight['layer_' + str(i)] = tf.Variable(np.random.normal(loc=0,scale=init_method,size=(self.deep_layers[i - 1], self.deep_layers[i])),dtype=np.float32)
self.weight['bias_' + str(i)] = tf.Variable(np.random.normal(loc=0, scale=init_method, size=(1, self.deep_layers[i])),dtype=np.float32)
last_layer_size = self.deep_layers[-1] + self.field_size + self.embedding_size
init_method = np.sqrt(np.sqrt(2.0 / (last_layer_size + 1)))
self.weight['last_layer'] = tf.Variable(np.random.normal(loc=0, scale=init_method, size=(last_layer_size, 1)), dtype=np.float32)
self.weight['last_bias'] = tf.Variable(tf.constant(0.01), dtype=np.float32)
################################step3:
#网络结构定义,网络结构定义到输出为止
#定义损失函数
#定义优化器
self.embedding_index = tf.nn.embedding_lookup(self.weight['feature_weight'],self.feat_index)
self.embedding_part = tf.multiply(self.embedding_index,tf.reshape(self.feat_value, [-1, self.field_size, 1]))
self.embedding_first = tf.nn.embedding_lookup(self.weight['feature_first'],self.feat_index)
self.embedding_first = tf.multiply(self.embedding_first, tf.reshape(self.feat_value, [-1, self.field_size, 1]))
self.first_order = tf.reduce_sum(self.embedding_first, 2)
self.sum_second_order = tf.reduce_sum(self.embedding_part, 1)
self.sum_second_order_square = tf.square(self.sum_second_order)
self.square_second_order = tf.square(self.embedding_part)
self.square_second_order_sum = tf.reduce_sum(self.square_second_order, 1)
self.second_order = 0.5 * tf.subtract(self.sum_second_order_square, self.square_second_order_sum)
self.fm_part = tf.concat([self.first_order, self.second_order], axis=1)
self.deep_embedding = tf.reshape(self.embedding_part, [-1, self.field_size * self.embedding_size])
for i in range(0, len(self.deep_layers)):
self.deep_embedding = tf.add(tf.matmul(self.deep_embedding, self.weight["layer_%d" % i]),self.weight["bias_%d" % i])
self.deep_embedding = self.deep_activation(self.deep_embedding)
din_all = tf.concat([self.fm_part, self.deep_embedding], axis=1)
self.out = tf.add(tf.matmul(din_all, self.weight['last_layer']), self.weight['last_bias'])
self.out = tf.nn.sigmoid(self.out)
#定义损失函数
self.loss = -tf.reduce_mean(self.label * tf.log(self.out + 1e-24) + (1 - self.label) * tf.log(1 - self.out + 1e-24))
self.loss += tf.contrib.layers.l2_regularizer(self.l2_reg_rate)(self.weight["last_layer"])
for i in range(len(self.deep_layers)):
self.loss += tf.contrib.layers.l2_regularizer(self.l2_reg_rate)(self.weight["layer_%d" % i])
#定义优化器
self.global_step = tf.Variable(0, trainable=False) #变量分为两种形式,一种,可以训练,一种不能训练
opt = tf.train.GradientDescentOptimizer(self.learning_rate)
trainable_params = tf.trainable_variables()
gradients = tf.gradients(self.loss, trainable_params)
clip_gradients, _ = tf.clip_by_global_norm(gradients, 5) #限制梯度的范围
self.train_op = opt.apply_gradients(zip(clip_gradients, trainable_params), global_step=self.global_step)
def train(self,sess,feat_index,feat_value,label):
#这里只定义了一次的训练
loss, _, step = sess.run([self.loss, self.train_op, self.global_step],
feed_dict={self.feat_index: feat_index, self.feat_value: feat_value,self.label: label})
return loss, step
def fit():
pass
def predict(self, sess, feat_index, feat_value):
result = sess.run([self.out], feed_dict={self.feat_index: feat_index,self.feat_value: feat_value})
return result
def save(self, sess, path):
#这个使用时需要网络结构
saver = tf.train.Saver()
saver.save(sess, save_path=path)
def restore(self, sess, path):
#这个使用时需要网络结构
saver = tf.train.Saver()
saver.restore(sess, save_path=path)
def save_model(self):
pass
def load_model(self):
pass
def get_batch(Xi, Xv, y, batch_size, index):
start = index * batch_size
end = (index + 1) * batch_size
end = end if end < len(y) else len(y)
return Xi[start:end], Xv[start:end], np.array(y[start:end])
if __name__ == '__main__':
args = Args()
data = load_data()
args.feature_sizes = data['feat_dim']
args.field_size = len(data['xi'][0])
args.is_training = True
with tf.Session() as sess:
Model = model(args)
#init variables
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
cnt = int(len(data['y_train']) / args.batch_size)
print('time all:%s' % cnt)
sys.stdout.flush()
if args.is_training:
for i in range(args.epoch):
print('epoch %s:' % i)
for j in range(0, cnt):
X_index, X_value, y = get_batch(data['xi'], data['xv'], data['y_train'], args.batch_size, j)
loss, step = Model.train(sess, X_index, X_value, y)
if j % 100 == 0:
print('the times of training is %d, and the loss is %s' % (j, loss))
Model.save(sess, args.checkpoint_dir)
else:
Model.restore(sess, args.checkpoint_dir)
for j in range(0, cnt):
X_index, X_value, y = get_batch(data['xi'], data['xv'], data['y_train'], args.batch_size, j)
result = Model.predict(sess, X_index, X_value)
print(result)