tensorflow eager 模式下打印dataset中的数据

tensorflow eager 模式下打印dataset中以及经过feature column后的数据

由于调试的需求,需要对比dataset中的原始数据和经过feature_column后的数据,那就需要把数据打出来,代码如下:

import tensorflow as tf
import tensorflow.contrib.eager as tfe
tfe.enable_eager_execution()

def parse_line(line):
    # CSV_COLUMN_DEFAULTS等需要自己定义
    columns = tf.decode_csv(line, record_defaults=CSV_COLUMN_DEFAULTS, field_delim='\t') 
    features = dict(zip(CSV_COLUMNS, columns))
    labels = features.pop(LABEL_COLUMN)
    features = process(features) # 对features中各个特征的额外处理,需要自己定义
    return features, label

if __name__ == "__main__":
    dataset = tf.data.TextLineDataset("data_name")
    #print(dataset.output_shapes)
    for line in tfe.Iterator(dataset):
        features, label = ele
        print("before feature column: {}".format.(features['feature_A_name']))
        feature_A = categorical_column_with_vocabulary_list('feature_A_name', [自定义的列表], dtype=tf.string, default_value=-1)
        feature_A_weight = weighted_categorical_column(feature_A, weight_feature_key='feature_A_weight_name')
        feature_A_column = tf.feature_column.indicator_column(feature_A_weight)
        columns = [feature_A_column]
        inputs = tf.feature_column.input_layer(features, columns)
        print("after feature column: {}".format(inputs))

###或者:
    dataset = dataset.map(parse_line)
    dataset = dataset.batch(16)  #按需设置batch的大小
    for ele in tfe.Iterator(dataset):
    	features, label = ele
    	#后续和上面的一样

你可能感兴趣的:(tensorflow)