# 注意:使用的是批处理tf.parse_example而不是tf.parse_single_example
def parse_exmp(serial_exmp):
oriExample = tf.parse_example(serial_exmp,features={'user_data':tf.FixedLenFeature([43], tf.float32)})
oriAllData = oriExample.get("user_data")
feaDics=dict()
retainLabel = oriAllData[:,0:1]
feaDics["sta_fea1"]=oriAllData[:,1:18]
feaDics["click_level"]=tf.cast(oriAllData[:,18:19],dtype=tf.int64)
rs = tf.feature_column.input_layer(features=feaDics,feature_columns=get_feature_columns_new())
return rs
# train_files是tfrecord文件列表
# 注意:batch()和prefetch()是必须的
input_files = tf.data.Dataset.list_files(train_files)
dataset = input_files.apply(tf.contrib.data.parallel_interleave(tf.data.TFRecordDataset,cycle_length=reader_num_threads)).batch(10).prefetch(5)
dataset = dataset.map(parse_exmp,num_parallel_calls=8)
test_op = dataset.make_initializable_iterator()
one_element = test_op.get_next()
with tf.train.MonitoredTrainingSession() as sess:
sess.run(test_op.initializer)
print(sess.run(one_element))
def parse_exmp_batched(serial_exmp):
oriAllData = serial_exmp.get("user_data")
feaDics=dict()
retainLabel = oriAllData[:,0:1]
feaDics["click_level"]=tf.cast(oriAllData[:,18:19],dtype=tf.int64)
feaDics["valid_flg"]=tf.cast(oriAllData[:,19:20],dtype=tf.int64)
feaDics["actDay_fea1"]=oriAllData[:,20:33]
rs1 = tf.feature_column.input_layer(features=feaDics,feature_columns=get_feature_columns_new()[0])
rs2 = tf.feature_column.input_layer(features=feaDics,feature_columns=get_feature_columns_new()[1])
rs3 = tf.concat([rs1,rs2],-1)
print(rs1)
print(rs2)
#return feaDics,{"label":tf.to_float(retainLabel)}
return rs1,rs2,rs3
def train_input_fn():
return tf.data.experimental.make_batched_features_dataset(
file_pattern=train_files,
batch_size=10,
features=feature_schema,
label_key=None
...
dataTest = train_input_fn()
dataset = dataTest.map(parse_exmp_batched,num_parallel_calls=8)
同上
def parse_exmp(serial_exmp):
oriExample = tf.io.parse_example(serial_exmp,features=feature_spec)
oriAllData = oriExample.get("user_data")
feaDics=dict()
retainLabel = oriAllData[:,0:1]
feaDics["sta_fea1"]=oriAllData[:,1:18]
feaDics["click_level"]=tf.cast(oriAllData[:,18:19],dtype=tf.int64)
rs = tf.compat.v1.feature_column.input_layer(features=feaDics,feature_columns=get_feature_columns_new())
print(rs)
return rs
dataset = tf.data.Dataset.from_tensor_slices(train_files).interleave(map_func=lambda x:tf.data.TFRecordDataset(x),cycle_length=batch_size,block_length=1,num_parallel_calls=8).batch(10).prefetch(5)
dataset =dataset.map(parse_exmp,num_parallel_calls=8)
test_op = tf.compat.v1.data.make_one_shot_iterator(dataset)
one_element = test_op.get_next()
print(one_element)
同TensorFlow1.x
同TensorFlow1.x
同1.3
重写自定义的inputlayer输入层
def parse_exmp_batched(serial_exmp):
oriAllData = serial_exmp.get("user_data")
feaDics=dict()
retainLabel = oriAllData[:,0:1]
feaDics["sta_fea1"]=oriAllData[:,1:18]
input_layers = myInputLayer(feature_columns_1=get_feature_columns_new()[0],feature_columns_2=get_feature_columns_new()[1],name="inputlayer")
outputRs = input_layers(feaDics)
print(outputRs)
return outputRs
def train_input_fn():
return tf.data.experimental.make_batched_features_dataset(
file_pattern=train_files,
batch_size=10,
features=feature_schema,
label_key=None
...
dataTest = train_input_fn()
dataset = dataTest.map(parse_exmp_batched,num_parallel_calls=8)
test_op = dataset.make_initializable_iterator()
one_element = test_op.get_next()
with tf.train.MonitoredTrainingSession() as sess:
sess.run(test_op.initializer)
print(sess.run(one_element))
from datetime import datetime,timedelta
import random
import tensorflow as tf
from tensorflow.python.feature_column import feature_column_v2 as fc_v2
class myInputLayer(tf.keras.layers.Layer):
def __init__(self,feature_columns_1,
feature_columns_2,
trainable=True,
name=None,
**kwargs):
...
def build(self, input_shape):
...
def call(self, inputs, **kwargs):
...
return ...
feature_schema = {
"user_data": tf.io.FixedLenFeature(shape=(43,),dtype=tf.float32),
# "label": tf.io.FixedLenFeature(shape=(1,),dtype=tf.float32)
}
def parse_exmp_batched(serial_exmp):
oriAllData = serial_exmp.get("user_data")
feaDics=dict()
retainLabel = oriAllData[:,0:1]
feaDics["sta_fea1"]=oriAllData[:,1:18]
feaDics["click_level"]=tf.cast(oriAllData[:,18:19],dtype=tf.int64)
input_layers = myInputLayer(feature_columns_1=get_feature_columns_new()[0],feature_columns_2=get_feature_columns_new()[1],name="inputlayer")
outputRs = input_layers(feaDics)
return outputRs
train_files = [...]
def train_input_fn():
return tf.data.experimental.make_batched_features_dataset(
file_pattern=train_files,
batch_size=10,
features=feature_schema,
label_key=None,
...)
dataTest = train_input_fn()
dataset = dataTest.map(parse_exmp_batched,num_parallel_calls=8)
test_op = tf.compat.v1.data.make_one_shot_iterator(dataset)
one_element = test_op.get_next()
print(one_element)
from datetime import datetime,timedelta
import random
from tensorflow.python.feature_column import feature_column_v2 as fc_v2
import tensorflow as tf
print(tf.executing_eagerly())
class myInputLayer(tf.keras.layers.Layer):
def __init__(self,feature_columns_1,
feature_columns_2,
trainable=True,
name=None,
**kwargs):
...
def build(self, input_shape):
...
super(myInputLayer,self).build(None)
def call(self, inputs, **kwargs):
...
return ...
feature_schema = {
"user_data": tf.io.FixedLenFeature(shape=(43,),dtype=tf.float32),
}
def get_feature_schema(exludeFea: List[str]) -> Dict:
valid_fea_schema = dict()
valid_fea_schema = {key: value for key, value in feature_schema.items() if key not in exludeFea}
return valid_fea_schema
feature_schemas = get_feature_schema([])
def get_feature_columns(args):
fea_1 = tf.feature_column.numeric_column(key="user_data",shape=(43),dtype=tf.float32)
return [fea_1]
def get_feature_columns_new():
...
act_first_fea1 = tf.feature_column.categorical_column_with_identity(key="act_first_fea1",num_buckets=2)
act_last_fea1 = tf.feature_column.categorical_column_with_identity(key="act_last_fea1",num_buckets=2)
click_level_emb = tf.feature_column.embedding_column(click_level,5)
valid_flg_emb = tf.feature_column.embedding_column(valid_flg,3)
act_first_fea1_emb,act_last_fea1_emb = tf.feature_column.shared_embeddings([act_first_fea1,act_last_fea1],2,'mean',initializer=None,trainable=True)
return [sta_fea1,actDay_fea1,click_level_emb,valid_flg_emb],[
act_first_fea1_emb,act_first_fea2_emb,act_first_fea3_emb,act_first_fea4_emb,act_first_fea5_emb,
act_last_fea1_emb,act_last_fea2_emb,act_last_fea3_emb,act_last_fea4_emb,act_last_fea5_emb]
feature_column = get_feature_columns("")
def parse_exmp(serial_exmp):
feature_spec = tf.feature_column.make_parse_example_spec(feature_column)
oriExample = tf.io.parse_example(serial_exmp,features=feature_spec)
print(oriExample)
oriAllData = oriExample.get("user_data")
feaDics=dict()
retainLabel = oriAllData[:,0:1]
feaDics["sta_fea1"]=oriAllData[:,1:18]
feaDics["click_level"]=tf.cast(oriAllData[:,18:19],dtype=tf.int64)
inputnet = myInputLayer(get_feature_columns_new()[0],get_feature_columns_new()[1],name="inputlayer")
rs = inputnet(feaDics)
return rs
train_files = [...]
batch_size = 10
dataset = tf.data.Dataset.from_tensor_slices(train_files).interleave(map_func=lambda x:tf.data.TFRecordDataset(x),cycle_length=batch_size,block_length=1,num_parallel_calls=8).batch(10).prefetch(5)
dataset =dataset.map(parse_exmp,num_parallel_calls=8)
test_op = tf.compat.v1.data.make_one_shot_iterator(dataset)
one_element = test_op.get_next()
print(one_element)