[实战Google深度学习框架]Tensorflow(5)图像数据处理

本篇blog主要以code+markdown的形式介绍tf这本实战书。(建议使用jupyter来学习)
 

第七章 图像数据处理

  • 7.1 TFRecord输入数据格式

  • 7.2 图像数据处理

  • 7.3 多线程输入数据处理框架

  • 7.4 数据集(Dataset)

7.1 TFRecord输入数据格式

tf需要一种统一的数据格式以方便处理

7.1.1 TFRecord格式介绍

tf.train.Exanple定义存储格式

message Example{
    Feature features = 1;
}

message Features{
    map feature = 1;
}

meassage Feature {
    oneof king{
        BytesList byte_list = 1;
        FloatList float_list = 2;
        Int64List int64_list = 3;
    }
};

7.1.2 TFRecord样例程序

  • 将MNIST数据转化为TFRecord格式
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

# 定义函数转化变量类型。
# 生成整数型属性
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

# 生成字符串型属性
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

# 读取mnist数据。
mnist = input_data.read_data_sets("../../datasets/MNIST_data",dtype=tf.uint8, one_hot=True)
images = mnist.train.images
labels = mnist.train.labels
pixels = images.shape[1]
num_examples = mnist.train.num_examples

# 输出TFRecord文件的地址。
filename = "Records/output.tfrecords"
writer = tf.python_io.TFRecordWriter(filename)
for index in range(num_examples):
    image_raw = images[index].tostring()

    example = tf.train.Example(features=tf.train.Features(feature={
        'pixels': _int64_feature(pixels),
        'label': _int64_feature(np.argmax(labels[index])),
        'image_raw': _bytes_feature(image_raw)
    }))
    writer.write(example.SerializeToString())
writer.close()
print("TFRecord文件已保存。")
  • 读取TFRecord文件
# 读取文件。
# 创建reader读取样例
reader = tf.TFRecordReader()
# 创建队列来维护文件列表
filename_queue = tf.train.string_input_producer(["Records/output.tfrecords"])
_,serialized_example = reader.read(filename_queue)

# 解析读取的样例。
features = tf.parse_single_example(
    serialized_example,
    features={
        'image_raw':tf.FixedLenFeature([],tf.string),
        'pixels':tf.FixedLenFeature([],tf.int64),
        'label':tf.FixedLenFeature([],tf.int64)
    })

# decode_raw将字符串转换成图像对应的像素数组
images = tf.decode_raw(features['image_raw'],tf.uint8)
labels = tf.cast(features['label'],tf.int32)
pixels = tf.cast(features['pixels'],tf.int32)

sess = tf.Session()

# 启动多线程处理输入数据。
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)

# 每次读取完一个数据继续读取
for i in range(10):
    image, label, pixel = sess.run([images, labels, pixels])

7.2 图像数据处理

7.2.1 TensorFlow图像处理函数

  • 对jepg格式的图像进行编码
# 7.2 图像处理
import tensorflow as tf
import matplotlib.pyplot as plt
image_raw_data = tf.gfile.FastGFile("/path/to/picture",'r').read()
with tf.Session() as sess:
    # 使用tf.iamge.decode_jepg解码
    img_data = tf.image.decode_jepg(image_raw_data)
    print(img_data.eval())
    
    plt.imshow(img_data.eval())
    plt.show()
    
    # 将一个图像的三位矩阵按照jepg格式存入文件,并打开
    # 看得到一个与原始图像一样的图片
    encode_image = tf.image.encode_jepg(img_data)
    with tf.gfile.GFile("/path/to/output","wb") as f:
        f.write(encode_image.eval())

对图像进行处理

  • 改变图片像素大小 tf.image.resize_images()
# tf.image.resize_images()
img_data = tf.image.convert_image_dtype(img_data, dtype=tf.float32)

resized = tf.image.resize_images(img_data, [300, 300], method=0)
  • 裁剪填充图片 tf.image.resize_image_with_crop_or_pad()
# 裁剪和填充
with tf.Session() as sess:    
    croped = tf.image.resize_image_with_crop_or_pad(img_data, 1000, 1000)
    padded = tf.image.resize_image_with_crop_or_pad(img_data, 3000, 3000)
    plt.imshow(croped.eval())
    plt.show()
    plt.imshow(padded.eval())
    plt.show()
  • 截取中间50% tf.image.central_crop()
# 截取中间50%
with tf.Session() as sess:   
    central_cropped = tf.image.central_crop(img_data, 0.5)
    plt.imshow(central_cropped.eval())
    plt.show()
  • 翻转图片 tf.image.transpose_image()
# 翻转
with tf.Session() as sess: 
    # 上下翻转
    #flipped1 = tf.image.flip_up_down(img_data)
    # 左右翻转
    #flipped2 = tf.image.flip_left_right(img_data)
    
    #对角线翻转
    transposed = tf.image.transpose_image(img_data)
    plt.imshow(transposed.eval())
    plt.show()
    
    # 以一定概率上下翻转图片。
    #flipped = tf.image.random_flip_up_down(img_data)
    # 以一定概率左右翻转图片。
    #flipped = tf.image.random_flip_left_right(img_data)
  • 色彩调整 tf.image.random_brightness()
# 色彩调整
with tf.Session() as sess:     
    # 将图片的亮度-0.5。
    #adjusted = tf.image.adjust_brightness(img_data, -0.5)
    
    # 将图片的亮度-0.5
    #adjusted = tf.image.adjust_brightness(img_data, 0.5)
    
    # 在[-max_delta, max_delta)的范围随机调整图片的亮度。
    adjusted = tf.image.random_brightness(img_data, max_delta=0.5)
    
    # 将图片的对比度-5
    #adjusted = tf.image.adjust_contrast(img_data, -5)
    
    # 将图片的对比度+5
    #adjusted = tf.image.adjust_contrast(img_data, 5)
    
    # 在[lower, upper]的范围随机调整图的对比度。
    #adjusted = tf.image.random_contrast(img_data, lower, upper)

    plt.imshow(adjusted.eval())
    plt.show()
  • 添加色相和饱和度tf.image.adjust_hue()

# 添加色相和饱和度
with tf.Session() as sess:         
    adjusted = tf.image.adjust_hue(img_data, 0.1)
    #adjusted = tf.image.adjust_hue(img_data, 0.3)
    #adjusted = tf.image.adjust_hue(img_data, 0.6)
    #adjusted = tf.image.adjust_hue(img_data, 0.9)
    
    # 在[-max_delta, max_delta]的范围随机调整图片的色相。max_delta的取值在[0, 0.5]之间。
    #adjusted = tf.image.random_hue(image, max_delta)
    
    # 将图片的饱和度-5。
    #adjusted = tf.image.adjust_saturation(img_data, -5)
    # 将图片的饱和度+5。
    #adjusted = tf.image.adjust_saturation(img_data, 5)
    # 在[lower, upper]的范围随机调整图的饱和度。
    #adjusted = tf.image.random_saturation(img_data, lower, upper)
    
    # 将代表一张图片的三维矩阵中的数字均值变为0,方差变为1。
    #adjusted = tf.image.per_image_whitening(img_data)
    
    plt.imshow(adjusted.eval())
    plt.show()
  • 添加标注框并裁减 tf.expand_dims()
# 添加标注框并裁减
with tf.Session() as sess:         

    boxes = tf.constant([[[0.05, 0.05, 0.9, 0.7], [0.35, 0.47, 0.5, 0.56]]])

    begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
        tf.shape(img_data), bounding_boxes=boxes)


    batched = tf.expand_dims(tf.image.convert_image_dtype(img_data, tf.float32), 0) 
    image_with_box = tf.image.draw_bounding_boxes(batched, bbox_for_draw)
    
    distorted_image = tf.slice(img_data, begin, size)
    plt.imshow(distorted_image.eval())
    plt.show()

 

7.2.2 图像预处理完整样例

# 7.2.2 图像预处理完整样例
def distort_color(image, color_ordering=0):
    if color_ordering == 0:
        image = tf.image.random_brightness(image, max_delta=32./255.) # 随机亮度
        image = tf.image.random_saturation(image, lower=0.5, upper=1.5) # 随机饱和度
        image = tf.image.random_hue(image, max_delta=0.2) # 随机色相
        image = tf.image.random_contrast(image, lower=0.5, upper=1.5) # 随机对比度
    else:
        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
        image = tf.image.random_brightness(image, max_delta=32./255.)
        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
        image = tf.image.random_hue(image, max_delta=0.2)

    return tf.clip_by_value(image, 0.0, 1.0)

def preprocess_for_train(image, height, width, bbox):
    # 查看是否存在标注框。
    if bbox is None:
        bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])
    if image.dtype != tf.float32:
        image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        
    # 随机的截取图片中一个块。
    bbox_begin, bbox_size, _ = tf.image.sample_distorted_bounding_box(
        tf.shape(image), bounding_boxes=bbox)
    bbox_begin, bbox_size, _ = tf.image.sample_distorted_bounding_box(
        tf.shape(image), bounding_boxes=bbox)
    distorted_image = tf.slice(image, bbox_begin, bbox_size)

    # 将随机截取的图片调整为神经网络输入层的大小。
    distorted_image = tf.image.resize_images(distorted_image, [height, width], method=np.random.randint(4))
    distorted_image = tf.image.random_flip_left_right(distorted_image)
    distorted_image = distort_color(distorted_image, np.random.randint(2))
    return distorted_image

# 读取图片
image_raw_data = tf.gfile.FastGFile("../../datasets/cat.jpg", "r").read()
with tf.Session() as sess:
    img_data = tf.image.decode_jpeg(image_raw_data)
    boxes = tf.constant([[[0.05, 0.05, 0.9, 0.7], [0.35, 0.47, 0.5, 0.56]]])
    for i in range(9):
        result = preprocess_for_train(img_data, 299, 299, boxes)
        plt.imshow(result.eval())
        plt.show()

 

7.3 多线程输入数据处理框架

队列是多线程输入的基础

7.3.1 队列与多线程

# 7.3 多线程输入
import tensorflow as tf
q = tf.FIFOQueue(2, "int32")
init = q.enqueue_many(([0, 10],))
x = q.dequeue()
y = x + 1
q_inc = q.enqueue([y])
with tf.Session() as sess:
    init.run()
    for _ in range(5):
        v, _ = sess.run([x, q_inc])
        print(v)
  • tf.Coordinator类协同多个线程一起停止
  1. should_stop函数返回值为True时当前线程退出
  2. request_stop函数调用一个函数时通知其他线程退出
import numpy as np
import threading
import time

def MyLoop(coord, worker_id):
    # 使用tf.Coordinator类提供的协同工具以判断线程是否需要停止
    while not coord.should_stop():
        if np.random.rand() < 0.1:
            print("Stoping from id: %d\n" % worker_id,)
            coord.request_stop()
        else:
            print("Working on id: %d\n" % worker_id, )
        # 暂停一秒
        time.sleep(1)

# 声明一个tf.train.Coordinator类来协同多个线程
coord = tf.train.Coordinator()
# 声明创建5个线程 注意py3取消了xrange 用原来range代替效果相同
threads = [threading.Thread(target=MyLoop, args=(coord, i, )) for i in range(5)]
# 等待所有线程退出
for t in threads:t.start()
coord.join(threads)
  • tf.QueueRunner启动多个线程来操作同一个队列
# tf.QueueRunner启动多个线程来操作同一个队列
import tensorflow as tf

# 声明一个先进先出队列,最多元素为100个,类型为实数
queue = tf.FIFOQueue(100, "float")
# 定义队列的入队操作
enqueue_op = queue.enqueue([tf.random_normal([1])])

# 使用tf.train.QueueRunner来创建多个线程运行队列的入队操作
# 第一个参数为被操作的队列
# 启动5个线程,运行enqueue_op操作
qr = tf.train.QueueRunner(queue, [enqueue_op] * 5)

# 加入计算图
tf.train.add_queue_runner(qr)
out_tensor = queue.dequeue()

with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    for _ in range(3): 
        print(sess.run(out_tensor)[0])
    
    coord.request_stop()
    coord.join(threads)

 

7.3.2 输入文件队列

# 7.3.2 输入文件队列
import tensorflow as tf
def _int64_feature(value):
    return tf.train.Feature(int64_list = tf.train.Int64List(value=[value]))

# 模拟数据情况,num_shards定义总共写入了多少文件
num_shards = 2
instances_per_shard = 2
for i in range(num_shards):
    filename = ('../../datasets/data.tfrecords-%.5d-of-%.5d' % (i, num_shards))
    writer = tf.python_io.TFRecordWriter(filename)
    
    # 将数据封装成Example结构写入TFRecord文件
    for j in range(instances_per_shard):
        example = tf.train.Example(features=tf.train.Features(feature={
            'i':_int64_feature(i),
            'j':_int64_feature(j)}))
        writer.write(example.SerializeToString())
    writer.close()
  • tf.train.match_filenames_once函数获取符合一个正则表达式的所有文件
  • tf.train.string_input_producer函数进行有效管理
# 通过tf.train.match_filenames_once获取文件列表
files = tf.train.match_filenames_once("../../datasets/data.tfrecords-*")

# 通过tf.train.string_input_producer函数创建输入队列,即为文件列表
filename_queue = tf.train.string_input_producer(files, shuffle=False)

reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)

features = tf.parse_single_example(
    serialized_example,
    features={
        'i': tf.FixedLenFeature([],tf.int64),
        'j': tf.FixedLenFeature([],tf.int64)
    })

with tf.Session() as sess:
    tf.local_variables_initializer().run()
    print(sess.run(files))
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    
    for i in range(6):
        print(sess.run([features['i'], features['j']]))
        
    coord.request_stop()
    coord.join(threads)

输出

[0, 0]
[0, 1]
[1, 0]
[1, 1]
[0, 0]
[0, 1]

 

7.3.3 组合训练数据(batching)

example, label = features['i'], features['j']
batch_size = 2
capacity = 1000 + 3 * batch_size
example_batch, label_batch = tf.train.batch([example, label], batch_size=batch_size, capacity=capacity)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    for i in range(3):
        cur_example_batch, cur_label_batch = sess.run([example_batch, label_batch])
        print(cur_example_batch, cur_label_batch)
    coord.request_stop()
    coord.join(threads)

 

7.3.4 输入数据处理框架

# 7.3.4 输入数据处理框架
files = tf.train.match_filenames_once("../../datasets/data.tfrecords")
filename_queue = tf.train.string_input_producer(files, shuffle=False)

# 读取文件。

reader = tf.TFRecordReader()
_,serialized_example = reader.read(filename_queue)

# 解析读取的样例。
features = tf.parse_single_example(
    serialized_example,
    features={
        'image_raw':tf.FixedLenFeature([],tf.string),
        'pixels':tf.FixedLenFeature([],tf.int64),
        'label':tf.FixedLenFeature([],tf.int64)
    })

decoded_images = tf.decode_raw(features['image_raw'],tf.uint8)
retyped_images = tf.cast(decoded_images, tf.float32)
labels = tf.cast(features['label'],tf.int32)
#pixels = tf.cast(features['pixels'],tf.int32)
images = tf.reshape(retyped_images, [784])

min_after_dequeue = 10000
batch_size = 100
capacity = min_after_dequeue + 3 * batch_size

image_batch, label_batch = tf.train.shuffle_batch([images, labels], 
                                                    batch_size=batch_size, 
                                                    capacity=capacity, 
                                                    min_after_dequeue=min_after_dequeue)

def inference(input_tensor, weights1, biases1, weights2, biases2):
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return tf.matmul(layer1, weights2) + biases2

# 模型相关的参数
INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500
REGULARAZTION_RATE = 0.0001   
TRAINING_STEPS = 5000        

weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))

weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

y = inference(image_batch, weights1, biases1, weights2, biases2)
    
# 计算交叉熵及其平均值
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=label_batch)
cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
# 损失函数的计算
regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
regularaztion = regularizer(weights1) + regularizer(weights2)
loss = cross_entropy_mean + regularaztion

# 优化损失函数
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
    
# 初始化回话并开始训练过程。
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    # 循环的训练神经网络。
    for i in range(TRAINING_STEPS):
        if i % 1000 == 0:
            print("After %d training step(s), loss is %g " % (i, sess.run(loss)))
                  
        sess.run(train_step) 
    coord.request_stop()
    coord.join(threads)

 

 

你可能感兴趣的:(实战google深度学习框架,tensorflow)