Demo-深度学习使用数据集进行训练和测试

一个包含数据输入和预处理流程的使用数据集进行训练和测试的完整例子

import tensorflow as tf
train_files = tf.train.match_filenames_once("path/to/train-file-*")
test_files = tf.train.match_filenames_once("path/to/test-file-*")

# 定义parser方法从TFRecord中解析数据
def parser(record):
    features = tf.parse_single_example(
        record,
        features = {
            'image': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64),
            'height': tf.FixedLenFeature([], tf.int64),
            'width': tf.FixedLenFeature([], tf.int64),
            'channels': tf.FixedLenFeature([], tf.int64)
        }
    )

    # 从原始图像数据解析出像素矩阵, 并根据图像尺寸还原图像
    decode_image = tf.decode_raw(features['image'], tf.uint8)
    decode_image.set_shape([features['height'], features['width'], features['channels']])

    label = features['label']
    return decode_image, label

image_size = 299
batch_size = 100
shuffle_buffer = 10000

# 定义读取训练数据的数据集
dataset = tf.data.TFRecordDataset(train_files)
dataset = dataset.map(parser)

# 预处理
dataset = dataset.map(lambda image, label : (preprocess_for_train(image, image_size, image_size, None), label))
dataset = dataset.shuffle(shuffle_buffer).batch(batch_size)

NUM_EPOCHS = 10
dataset = dataset.repeat(NUM_EPOCHS)

# 定义数据集迭代器
iterator = dataset.make_initializable_iterator()
image_batch, label_batch = iterator.get_next()

# 定义神经网络的结构以及优化过程
learning_rate = 0.01
logit = inference(image_batch)
loss = calc_loss(logit, label_batch)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

# 定义测试用的Dataset
test_dataset = tf.data.TFRecordDataset(test_files)
test_dataset = test_dataset.map(parser).map(lambda image, label : (tf.image.resize_iamges(image, [image_size, image_size]), label))
test_dataset = test_dataset.batch(batch_size)

# 定义测试数据上的迭代器
test_iterator = test_dataset.make_initializable_iterator()
test_image_batch, test_label_batch = test_iterator.get_next()

# 定义预测结果为logit值最大的优化过程
test_logit = inference(test_image_batch)
predictions = tf.argmax(test_logit, axis = -1, output_type = tf.int32)

# 声明会话并运行神经网络的优化过程
with tf.Session() as sess:
    # 初始化变量
    sess.run((tf.global_variables_initializer(), tf.local_variables_initializer()))
    # 初始化训练数据的迭代器
    sess.run(iterator.initializer)
    # 循环进行训练,直到数据完成输入、抛出OutOfRangeError错误
    while True:
        try:
            sess.run(train_step)
        except tf.errors.OutOfRangeError:
            break
    # 初始化测试数据的迭代器
    sess.run(test_iterator.initializer)
    # 获取预测结果
    test_results = []
    test_labels = []
    while True:
        try:
            pred, label = sess.run([predictions, test_label_batch])
            test_results.extend(pred)
            test_labels.extend(label)
        except tf.errors.OutOfRangeError:
            break

# 计算准确率
correct = [float(y == y_) for (y, y_) in zip (test_results, test_labels)]
accurcy = sum(coorect) / len(correct)
print("Test accuracy is: ", accuracy)

 

你可能感兴趣的:(Machine,learning,R/Python)