feature_schema = {
# 包含了tfrecord里的所有特征,包括标签label
"sex": tf.io.FixedLenFeature(shape=(1,), dtype=tf.int64),
"age": tf.io.FixedLenFeature(shape=(1,), dtype=tf.int64),
"label": tf.io.FixedLenFeature(shape=(1,), dtype=tf.float32)
}
# train_files就是tfrecord文件列表
tf.data.experimental.make_batched_features_dataset(
file_pattern=train_files,
features=feature_schema,
label_key="label")
参考:tensorflow系列——读取tfrecord数据
def get_feature_columns(args):
user_sex = tf.feature_column.categorical_column_with_identity(key="sex", num_buckets=3, default_value=0)
user_age = tf.feature_column.categorical_column_with_identity(key="age", num_buckets=9, default_value=0)
return [user_sex,user_age]
dnn_model = estimator_lib.DNNClassifier(hidden_units=[...],
model_dir=FLAGS.model_dir,
feature_columns=get_feature_columns([...]),
...)
方式同上
方式同上
model_fn为自定义estimator的主体部分
# feature_columns_new 为2中定义的feature_column
# features 为从tfrecord中获取的tensor
# inputs_layers 为神经网络层的输入
def model_fn(features,labels,mode):
inputs_layers =tf.feature_column.input_layer(features,feature_columns_new)
# 可以多次使用input_layer
...
# 不需要feature及feature_column及feature_schema作为参数输入
dnn_model = MyEstimator(
model_dir=FLAGS.model_dir,
optimizer = ...,
hidden_units = [...]
)
# 通过下面方法直接从train_spec输出到自定义的estimator的model_fn方法中
tf.estimator.train_and_evaluate(estimator=dnn_model,train_spec=train_spec,eval_spec=eval_spec)
以上使用的版本为TensorFlow1.x,对于TensorFlow2.x修改如下:
from tensorflow.python.feature_column import feature_column as fc_v1
rs = fc_v1.input_layer(features=feaDics,feature_columns=get_feature_columns_new())
# 或者:
rs = tf.compat.v1.feature_column.input_layer(features=feaDics,feature_columns=get_feature_columns_new())
方式同上
方式同上
参考:自定义层或自定义模型
# 重写自定义层的标准方式:
class myInputLayer(tf.keras.layers.Layer):
def __init__(self,feature_columns_1,
feature_columns_2,
trainable=True,
name=None,
**kwargs):
super(myInputLayer,self).__init__(trainable=trainable,name=name,**kwargs)
self._feature_columns_1 = feature_columns_1
self._feature_columns_2 = feature_columns_2
self._state_magager = fc_v2._StateManagerImpl(self,trainable)
def build(self, input_shape):
with tf.variable_scope(self.name):
for column in self._feature_columns_1:
with tf.variable_scope(column.name):
column.create_state(self._state_magager)
for column in self._feature_columns_2:
with tf.variable_scope(column.name):
column.create_state(self._state_magager)
super(myInputLayer,self).build(None)
def call(self, inputs, **kwargs):
transformation_catch = fc_v2.FeatureTransformationCache(inputs)
output_tensors = []
for column in self._feature_columns_1 + self._feature_columns_2:
with tf.name_scope(column.name):
tensor = column.get_dense_tensor(transformation_catch,self._state_magager)
num_elements = column.variable_shape.num_elements()
batch_size = tf.shape(tensor)[0]
output_tensor = tf.reshape(tensor,shape=(batch_size,num_elements))
output_tensors.append(output_tensor)
return output_tensors
def model_fn(features,labels,mode):
inputnet = myInputLayer(feature_columns_new[0],features,feature_columns_new[1],name="inputlayer")
rs = inputnet(features)
# rs之后用于dnn层