全文以一个简单的例子介绍Estimator的使用,具体的代码和数据参见https://www.jianshu.com/p/5495f87107e7
下文对源代码做了注释上的增加
import tensorflow as tf
#自定义模型函数
'''
模型函数包括训练模型、预测模型、测试模型
通过不同的属性,可以得到的模型结果
测试结果中的predictions是对输出结果的定义
'''
def my_model_fn(features,labels,mode,params):
#输入层,feature_columns对应Classifier(feature_columns=...)
net = tf.feature_column.input_layer(features, params['feature_columns'])
#隐藏层,hidden_units对应Classifier(unit=[10,10]),2个各含10节点的隐藏层
for units in params['hidden_units']:
net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
#输出层,n_classes对应3种鸢尾花
logits = tf.layers.dense(net, params['n_classes'], activation=None)
#预测
predicted_classes = tf.argmax(logits, 1) #预测的结果中最大值即种类
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
'class_ids': predicted_classes[:, tf.newaxis], #拼成列表[[3],[2]]格式
'probabilities': tf.nn.softmax(logits), #把[-1.3,2.6,-0.9]规则化到0~1范围,表示可能性
'logits': logits,#[-1.3,2.6,-0.9]
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
#损失函数
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
#训练
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.AdagradOptimizer(learning_rate=0.1) #用它优化损失函数,达到损失最少精度最高
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) #执行优化!
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
#评价
accuracy = tf.metrics.accuracy(labels=labels,
predictions=predicted_classes,
name='acc_op') #计算精度
metrics = {'accuracy': accuracy} #返回格式
tf.summary.scalar('accuracy', accuracy[1]) #仅为了后面图表统计使用
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
import os
import pandas as pd
FUTURES = ['SepalLength', 'SepalWidth','PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Setosa', 'Versicolor', 'Virginica']
dir_path = os.path.dirname(os.path.realpath(__file__))
train_path=os.path.join(dir_path,'iris_training.csv')
test_path=os.path.join(dir_path,'iris_test.csv')
train = pd.read_csv(train_path, names=FUTURES, header=0)
train_x, train_y = train, train.pop('Species')
test = pd.read_csv(test_path, names=FUTURES, header=0)
test_x, test_y = test, test.pop('Species')
'''
对特征的定义
key 名称
shape
default_value
dtype
等等
'''
feature_columns = []
for key in train_x.keys():
feature_columns.append(tf.feature_column.numeric_column(key=key))
tf.logging.set_verbosity(tf.logging.INFO)
models_path=os.path.join(dir_path,'models/')
#创建自定义分类器
'''
参数包括
model_fn
model_dir
params
config
warm_start_form
'''
classifier = tf.estimator.Estimator(
model_fn=my_model_fn,
model_dir=models_path,
params={
'feature_columns': feature_columns,
'hidden_units': [10, 10],
'n_classes': 3,
})
#针对训练的喂食函数
batch_size=100
def train_input_fn(features, labels, batch_size):
#对每个样本进行切分
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
#shuffle打乱样本,越大越混乱,repeat重复样本,batch划分成一个个batch
dataset = dataset.shuffle(1000).repeat().batch(batch_size) #每次随机调整数据顺序
#返回数据迭代器,输出一次后就丢弃了
return dataset.make_one_shot_iterator().get_next()
#开始训练,设置数据喂食函数和迭代次数即可
classifier.train(
input_fn=lambda:train_input_fn(train_x, train_y, 100),
steps=1000)
#针对测试的喂食函数
def eval_input_fn(features, labels, batch_size):
features=dict(features)
inputs=(features,labels)
dataset = tf.data.Dataset.from_tensor_slices(inputs)
dataset = dataset.batch(batch_size)
return dataset.make_one_shot_iterator().get_next()
#评估我们训练出来的模型质量
eval_result = classifier.evaluate(
input_fn=lambda:eval_input_fn(test_x, test_y,batch_size))
print(eval_result)
#支持100次循环对新数据进行分类预测
#for i in range(0,100):
# print('\nPlease enter features: SepalLength,SepalWidth,PetalLength,PetalWidth')
# a,b,c,d = map(float, input().split(',')) #捕获用户输入的数字
# predict_x = {
# 'SepalLength': [a],
# 'SepalWidth': [b],
# 'PetalLength': [c],
# 'PetalWidth': [d],
# }
#
# #进行预测
# predictions = classifier.predict(
# input_fn=lambda:eval_input_fn(predict_x,
# labels=[0,],
# batch_size=batch_size))
#
# #预测结果是数组,尽管实际我们只有一个
# for pred_dict in predictions:
# class_id = pred_dict['class_ids'][0]
# probability = pred_dict['probabilities'][class_id]
# print(SPECIES[class_id],100 * probability)