【Tool】Tensorflow 基础学习 I 数据加载

Tags: DeepLearning Tool


在使用Tensorflow进行机器学习和深度学习任务的时候,我们经常要加载各种数据,不管是官方的还是自己的数据集。这里总结下Tensorflow读取数据的方法。

使用生成器文件队列直接读取

  1. 如果是list文件名
    tf.train.string_input_producer()
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt


def read_from_string_list(data_dir, is_train, batch_size):
    """
    Func:
        Read Image Data from bin file
    Args:
        data_dir: the directory of CIFAR10
        batch_size:      
    Returns:
        label: 1D tensor, tf.int32
        image: 4D tensor, [batch_size, height, width, 3], tf.float32
    """
    
    img_width = 32
    img_height = 32
    img_depth = 3
    label_bytes = 1
    image_bytes = img_width*img_height*img_depth
    
    
    with tf.name_scope('input'):
        
        if is_train:
            filenames = [os.path.join(data_dir, 'data_batch_%d' %ii) for ii in np.arange(1, 6)]
        else:
            filenames = [os.path.join(data_dir, 'test_batch')]
              
        filename_queue = tf.train.string_input_producer(filenames) # 创建文件名队列

        reader = tf.FixedLengthRecordReader(label_bytes + image_bytes)

        key, value = reader.read(filename_queue)

        record_bytes = tf.decode_raw(value, tf.uint8)
        
        label = tf.slice(record_bytes, [0], [label_bytes])   
        label = tf.cast(label, tf.int32)

        image_raw = tf.slice(record_bytes, [label_bytes], [image_bytes])     
        image_raw = tf.reshape(image_raw, [img_depth, img_height, img_width])     
        image = tf.transpose(image_raw, (1,2,0)) # convert from D/H/W to H/W/D       
        image = tf.cast(image, tf.float32)
        
        # preprocessing: 
        image = tf.image.per_image_standardization(image) # normalization

        # shuffle data 
        images, label_batch = tf.train.shuffle_batch(
                                [image, label], 
                                batch_size = batch_size,
                                num_threads= 8,
                                capacity = 2000,
                                min_after_dequeue = 1500,
                                allow_smaller_final_batch=False)

        # make one-hot vector, refer to sorfmax_cross_entropy vs. sparse_softmax_cross_entropy    
        n_classes = 10
        label_batch = tf.one_hot(label_batch, depth= n_classes)

    return images, tf.reshape(label_batch, [batch_size, n_classes])
    

if __name__ == "__main__":

    data_dir = "/Users/yuhua.cheng/Documents/study/ML_tensorflow/cifar-10-batches-py/"
    images, labels = read_from_string_list(data_dir, True, 49)
    num_epochs = 50
    
    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        try:
            for i in range(num_epochs):
                sess.run([images, labels])
                print("images shape:", images.shape)
                print("labels shape:", labels.shape)
        except tf.errors.OutOfRangeError:
            print('done!')
        finally:
            coord.request_stop()
        coord.join(threads)
  1. 如果是文件
    tf.train.slice_input_producer()
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt


def read_from_file_list(data_dir, is_train, batch_size):
    """
    Func:
        Read Image Data
    Args:
        data_dir: the directory of CIFAR10
        batch_size:      
    Returns:
        label: 1D tensor, tf.int32
        image: 4D tensor, [batch_size, height, width, 3], tf.float32
    """
    
    img_width = 32
    img_height = 32
    img_depth = 3
    label_bytes = 1
    image_bytes = img_width*img_height*img_depth
    
    
    with tf.name_scope('input'):
        
        if is_train:
            images = np.load("/Users/yuhua.cheng/Desktop/portrait_segmentation/myMobileUNet/data/portrait/X_val.npy")
            labels = np.load("/Users/yuhua.cheng/Desktop/portrait_segmentation/myMobileUNet/data/portrait/Y_val.npy")
        else:
            images = np.load("/Users/yuhua.cheng/Desktop/portrait_segmentation/myMobileUNet/data/portrait/X_test.npy")
            labels = np.load("/Users/yuhua.cheng/Desktop/portrait_segmentation/myMobileUNet/data/portrait/Y_test.npy")
              
        file_queue = tf.train.slice_input_producer([images, labels], shuffle=True) # 创建文件名队列

        image_batch, label_batch = tf.train.batch(file_queue, batch_size=batch_size)

    return image_batch, label_batch
    

if __name__ == "__main__":

    data_dir = "/Users/yuhua.cheng/Documents/study/ML_tensorflow/cifar-10-batches-py/"
    image_batch, label_batch = read_from_file_list(data_dir, True, 100)
    num_epochs = 50
    
    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        try:
            for i in range(num_epochs):
                sess.run([image_batch, label_batch])
                print("images shape:", image_batch.shape)
                print("labels shape:", label_batch.shape)
            print(i)
        except tf.errors.OutOfRangeError:
            print('done!')
        finally:
            coord.request_stop()
        coord.join(threads)

使用tf.data.Dataset API

import tensorflow as tf 
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Create model
def conv_net(x, n_classes, dropout, reuse, is_training):
    # Define a scope for reusing the variables
    with tf.variable_scope('ConvNet', reuse=reuse):
        # MNIST data input is a 1-D vector of 784 features (28*28 pixels)
        # Reshape to match picture format [Height x Width x Channel]
        # Tensor input become 4-D: [Batch Size, Height, Width, Channel]
        x = tf.reshape(x, shape=[-1, 28, 28, 1])

        # Convolution Layer with 32 filters and a kernel size of 5
        conv1 = tf.layers.conv2d(x, 32, 5, activation=tf.nn.relu)
        # Max Pooling (down-sampling) with strides of 2 and kernel size of 2
        conv1 = tf.layers.max_pooling2d(conv1, 2, 2)

        # Convolution Layer with 32 filters and a kernel size of 5
        conv2 = tf.layers.conv2d(conv1, 64, 3, activation=tf.nn.relu)
        # Max Pooling (down-sampling) with strides of 2 and kernel size of 2
        conv2 = tf.layers.max_pooling2d(conv2, 2, 2)

        # Flatten the data to a 1-D vector for the fully connected layer
        fc1 = tf.contrib.layers.flatten(conv2)

        # Fully connected layer (in contrib folder for now)
        fc1 = tf.layers.dense(fc1, 1024)
        # Apply Dropout (if is_training is False, dropout is not applied)
        fc1 = tf.layers.dropout(fc1, rate=dropout, training=is_training)

        # Output layer, class prediction
        out = tf.layers.dense(fc1, n_classes)
        # Because 'softmax_cross_entropy_with_logits' already apply softmax,
        # we only apply softmax to testing network
        out = tf.nn.softmax(out) if not is_training else out
        
    return out

# parameters
learning_rate = 0.01
num_steps = 1000
batch_size = 128
display_step = 100

n_input = 784
n_classes = 10
dropout = 0.75

# Create a dataset tensor from the images and the labels
dataset = tf.data.Dataset.from_tensor_slices((mnist.train.images, mnist.train.labels))
# Create batches of data
dataset = dataset.batch(batch_size)
# Create an iterator, to go over the dataset
iterator = dataset.make_initializable_iterator()
# It is better to use 2 placeholders, to avoid to load all data into memory,
# and avoid the 2Gb restriction length of a tensor.
_data = tf.placeholder(tf.float32, [None, n_input])
_labels = tf.placeholder(tf.float32, [None, n_classes])

# Neural Net Input
X, Y = iterator.get_next()

# Create a graph for training
logits_train = conv_net(X, n_classes, dropout, reuse=False, is_training=True)
# Create another graph for testing that reuse the same weights, but has
# different behavior for 'dropout' (not applied).
logits_test = conv_net(X, n_classes, dropout, reuse=True, is_training=False)

# Define loss and optimizer (with train logits, for dropout to take effect)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits_train, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(logits_test, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))


with tf.Session() as sess:

    # Initialize the iterator
    sess.run(iterator.initializer, feed_dict={_data: mnist.train.images,
                                          _labels: mnist.train.labels})

    sess.run(tf.global_variables_initializer())
    for step in range(1, num_steps + 1):
    
        try:
            # Run optimization
            sess.run(train_op)
        except tf.errors.OutOfRangeError:
            # Reload the iterator when it reaches the end of the dataset
            sess.run(iterator.initializer, 
                 feed_dict={_data: mnist.train.images,
                            _labels: mnist.train.labels})
            sess.run(train_op)

        if step % display_step == 0 or step == 1:
            # Calculate batch loss and accuracy
            # (note that this consume a new batch of data)
            logits, loss, acc = sess.run([logits_test, loss_op, accuracy])
            
            print("Step " + str(step) + ", Minibatch Loss= " + \
              "{:.4f}".format(loss) + ", Training Accuracy= " + \
              "{:.3f}".format(acc))

    print("Optimization Finished!")

转换成TFRecords读取

TFRecords是一种二值文件格式,比较适合用于大规模数据存储和处理。

from PIL import Image
import numpy as np
import skimage.io as io
import tensorflow as tf
import matplotlib.pyplot as plt
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

filename_pairs = [("/Users/yuhua.cheng/Documents/dataset/segmentation/data/images_data_crop/00001.jpg",\
"/Users/yuhua.cheng/Documents/dataset/segmentation/data/jpg_mask/00001_mask.jpg"), 
("/Users/yuhua.cheng/Documents/dataset/segmentation/data/images_data_crop/00002.jpg",\
"/Users/yuhua.cheng/Documents/dataset/segmentation/data/jpg_mask/00002_mask.jpg"), 
("/Users/yuhua.cheng/Documents/dataset/segmentation/data/images_data_crop/00003.jpg",\
"/Users/yuhua.cheng/Documents/dataset/segmentation/data/jpg_mask/00003_mask.jpg")]

tfrecords_filename = 'pascal_voc_segmentation.tfrecords'

writer = tf.python_io.TFRecordWriter(tfrecords_filename)

# Let's collect the real images to later on compare
# to the reconstructed ones
original_images = []

for img_path, annotation_path in filename_pairs:

    img = np.array(Image.open(img_path))
    annotation = np.array(Image.open(annotation_path))
    height = img.shape[0]
    width = img.shape[1]

    # Put in the original images into array
    # Just for future check for correctness
    original_images.append((img, annotation))

    img_raw = img.tostring()
    annotation_raw = annotation.tostring()

    example = tf.train.Example(features=tf.train.Features(feature={
        'height': _int64_feature(height),
        'width': _int64_feature(width),
        'image_raw': _bytes_feature(img_raw),
        'mask_raw': _bytes_feature(annotation_raw)}))
    writer.write(example.SerializeToString())

writer.close()

reconstructed_images = []

record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_filename)
for string_record in record_iterator:
    
    example = tf.train.Example()
    example.ParseFromString(string_record)
    
    height = int(example.features.feature['height']
                                 .int64_list
                                 .value[0])
    
    width = int(example.features.feature['width']
                                .int64_list
                                .value[0])
    
    img_string = (example.features.feature['image_raw']
                                  .bytes_list
                                  .value[0])
    
    annotation_string = (example.features.feature['mask_raw']
                                .bytes_list
                                .value[0])
    
    img_1d = np.fromstring(img_string, dtype=np.uint8)
    reconstructed_img = img_1d.reshape((height, width, -1)) # -1 for depth
    
    annotation_1d = np.fromstring(annotation_string, dtype=np.uint8)
    
    # Annotations don't have depth (3rd dimension)
    reconstructed_annotation = annotation_1d.reshape((height, width))
    
    reconstructed_images.append((reconstructed_img, reconstructed_annotation))

for i, images in enumerate(reconstructed_images):
    print(i)
    image, annotation = images
    plt.subplot(3, 2, 2*i+1)
    plt.imshow(image)
    plt.subplot(3, 2, 2*i+2)
    plt.imshow(annotation)
    
plt.show()

在tensorflow session中读取TFRecords数据

import tensorflow as tf
#import skimage.io as io
import matplotlib.pyplot as io
IMAGE_HEIGHT = 384
IMAGE_WIDTH = 384

tfrecords_filename = 'pascal_voc_segmentation.tfrecords'

def read_and_decode(filename_queue):
    
    reader = tf.TFRecordReader()

    _, serialized_example = reader.read(filename_queue)

    features = tf.parse_single_example(
      serialized_example,
      # Defaults are not specified since both keys are required.
      features={
        'height': tf.FixedLenFeature([], tf.int64),
        'width': tf.FixedLenFeature([], tf.int64),
        'image_raw': tf.FixedLenFeature([], tf.string),
        'mask_raw': tf.FixedLenFeature([], tf.string)
        })

    # Convert from a scalar string tensor (whose single string has
    # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
    # [mnist.IMAGE_PIXELS].
    image = tf.decode_raw(features['image_raw'], tf.uint8)
    annotation = tf.decode_raw(features['mask_raw'], tf.uint8)
    
    height = tf.cast(features['height'], tf.int32)
    width = tf.cast(features['width'], tf.int32)
    
    image_shape = tf.stack([height, width, 3])
    annotation_shape = tf.stack([height, width, 1])
    
    image = tf.reshape(image, image_shape)
    annotation = tf.reshape(annotation, annotation_shape)
    
    image_size_const = tf.constant((IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=tf.int32)
    annotation_size_const = tf.constant((IMAGE_HEIGHT, IMAGE_WIDTH, 1), dtype=tf.int32)
    
    # Random transformations can be put here: right before you crop images
    # to predefined size. To get more information look at the stackoverflow
    # question linked above.
    
    resized_image = tf.image.resize_image_with_crop_or_pad(image=image,
                                           target_height=IMAGE_HEIGHT,
                                           target_width=IMAGE_WIDTH)
    
    resized_annotation = tf.image.resize_image_with_crop_or_pad(image=annotation,
                                           target_height=IMAGE_HEIGHT,
                                           target_width=IMAGE_WIDTH)
    
    
    images, annotations = tf.train.shuffle_batch( [resized_image, resized_annotation],
                                                 batch_size=2,
                                                 capacity=30,
                                                 num_threads=2,
                                                 min_after_dequeue=10)
    
    return images, annotations


filename_queue = tf.train.string_input_producer(
    [tfrecords_filename])

# Even when reading in multiple threads, share the filename
# queue.
image, annotation = read_and_decode(filename_queue)

# The op for initializing the variables.
init_op = tf.group(tf.global_variables_initializer(),
                   tf.local_variables_initializer())

with tf.Session()  as sess:
    
    sess.run(init_op)
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    # Let's read off 5 batches just for example
    for i in range(5):
    
        img, anno = sess.run([image, annotation])
        print(img[0, :, :, :].shape)
        
        print('batch:',i)
        
        # We selected the batch size of two
        # So we should get two image pairs in each batch
        # Let's make sure it is random
        io.subplot(1,2,1)
        io.imshow(img[0, :, :, :])
        io.subplot(1,2,2)
        io.imshow(anno[0, :, :, 0])

        io.figure()
        
        io.subplot(1,2,1)
        io.imshow(img[1, :, :, :])
        io.subplot(1,2,2)
        io.imshow(anno[1, :, :, 0])
        io.show()
        
    
    coord.request_stop()
    coord.join(threads)

你可能感兴趣的:(【Tool】Tensorflow 基础学习 I 数据加载)