Tags: DeepLearning Tool
在使用Tensorflow进行机器学习和深度学习任务的时候,我们经常要加载各种数据,不管是官方的还是自己的数据集。这里总结下Tensorflow读取数据的方法。
使用生成器文件队列直接读取
- 如果是list文件名
tf.train.string_input_producer()
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
def read_from_string_list(data_dir, is_train, batch_size):
"""
Func:
Read Image Data from bin file
Args:
data_dir: the directory of CIFAR10
batch_size:
Returns:
label: 1D tensor, tf.int32
image: 4D tensor, [batch_size, height, width, 3], tf.float32
"""
img_width = 32
img_height = 32
img_depth = 3
label_bytes = 1
image_bytes = img_width*img_height*img_depth
with tf.name_scope('input'):
if is_train:
filenames = [os.path.join(data_dir, 'data_batch_%d' %ii) for ii in np.arange(1, 6)]
else:
filenames = [os.path.join(data_dir, 'test_batch')]
filename_queue = tf.train.string_input_producer(filenames) # 创建文件名队列
reader = tf.FixedLengthRecordReader(label_bytes + image_bytes)
key, value = reader.read(filename_queue)
record_bytes = tf.decode_raw(value, tf.uint8)
label = tf.slice(record_bytes, [0], [label_bytes])
label = tf.cast(label, tf.int32)
image_raw = tf.slice(record_bytes, [label_bytes], [image_bytes])
image_raw = tf.reshape(image_raw, [img_depth, img_height, img_width])
image = tf.transpose(image_raw, (1,2,0)) # convert from D/H/W to H/W/D
image = tf.cast(image, tf.float32)
# preprocessing:
image = tf.image.per_image_standardization(image) # normalization
# shuffle data
images, label_batch = tf.train.shuffle_batch(
[image, label],
batch_size = batch_size,
num_threads= 8,
capacity = 2000,
min_after_dequeue = 1500,
allow_smaller_final_batch=False)
# make one-hot vector, refer to sorfmax_cross_entropy vs. sparse_softmax_cross_entropy
n_classes = 10
label_batch = tf.one_hot(label_batch, depth= n_classes)
return images, tf.reshape(label_batch, [batch_size, n_classes])
if __name__ == "__main__":
data_dir = "/Users/yuhua.cheng/Documents/study/ML_tensorflow/cifar-10-batches-py/"
images, labels = read_from_string_list(data_dir, True, 49)
num_epochs = 50
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
try:
for i in range(num_epochs):
sess.run([images, labels])
print("images shape:", images.shape)
print("labels shape:", labels.shape)
except tf.errors.OutOfRangeError:
print('done!')
finally:
coord.request_stop()
coord.join(threads)
- 如果是文件
tf.train.slice_input_producer()
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
def read_from_file_list(data_dir, is_train, batch_size):
"""
Func:
Read Image Data
Args:
data_dir: the directory of CIFAR10
batch_size:
Returns:
label: 1D tensor, tf.int32
image: 4D tensor, [batch_size, height, width, 3], tf.float32
"""
img_width = 32
img_height = 32
img_depth = 3
label_bytes = 1
image_bytes = img_width*img_height*img_depth
with tf.name_scope('input'):
if is_train:
images = np.load("/Users/yuhua.cheng/Desktop/portrait_segmentation/myMobileUNet/data/portrait/X_val.npy")
labels = np.load("/Users/yuhua.cheng/Desktop/portrait_segmentation/myMobileUNet/data/portrait/Y_val.npy")
else:
images = np.load("/Users/yuhua.cheng/Desktop/portrait_segmentation/myMobileUNet/data/portrait/X_test.npy")
labels = np.load("/Users/yuhua.cheng/Desktop/portrait_segmentation/myMobileUNet/data/portrait/Y_test.npy")
file_queue = tf.train.slice_input_producer([images, labels], shuffle=True) # 创建文件名队列
image_batch, label_batch = tf.train.batch(file_queue, batch_size=batch_size)
return image_batch, label_batch
if __name__ == "__main__":
data_dir = "/Users/yuhua.cheng/Documents/study/ML_tensorflow/cifar-10-batches-py/"
image_batch, label_batch = read_from_file_list(data_dir, True, 100)
num_epochs = 50
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
try:
for i in range(num_epochs):
sess.run([image_batch, label_batch])
print("images shape:", image_batch.shape)
print("labels shape:", label_batch.shape)
print(i)
except tf.errors.OutOfRangeError:
print('done!')
finally:
coord.request_stop()
coord.join(threads)
使用tf.data.Dataset API
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Create model
def conv_net(x, n_classes, dropout, reuse, is_training):
# Define a scope for reusing the variables
with tf.variable_scope('ConvNet', reuse=reuse):
# MNIST data input is a 1-D vector of 784 features (28*28 pixels)
# Reshape to match picture format [Height x Width x Channel]
# Tensor input become 4-D: [Batch Size, Height, Width, Channel]
x = tf.reshape(x, shape=[-1, 28, 28, 1])
# Convolution Layer with 32 filters and a kernel size of 5
conv1 = tf.layers.conv2d(x, 32, 5, activation=tf.nn.relu)
# Max Pooling (down-sampling) with strides of 2 and kernel size of 2
conv1 = tf.layers.max_pooling2d(conv1, 2, 2)
# Convolution Layer with 32 filters and a kernel size of 5
conv2 = tf.layers.conv2d(conv1, 64, 3, activation=tf.nn.relu)
# Max Pooling (down-sampling) with strides of 2 and kernel size of 2
conv2 = tf.layers.max_pooling2d(conv2, 2, 2)
# Flatten the data to a 1-D vector for the fully connected layer
fc1 = tf.contrib.layers.flatten(conv2)
# Fully connected layer (in contrib folder for now)
fc1 = tf.layers.dense(fc1, 1024)
# Apply Dropout (if is_training is False, dropout is not applied)
fc1 = tf.layers.dropout(fc1, rate=dropout, training=is_training)
# Output layer, class prediction
out = tf.layers.dense(fc1, n_classes)
# Because 'softmax_cross_entropy_with_logits' already apply softmax,
# we only apply softmax to testing network
out = tf.nn.softmax(out) if not is_training else out
return out
# parameters
learning_rate = 0.01
num_steps = 1000
batch_size = 128
display_step = 100
n_input = 784
n_classes = 10
dropout = 0.75
# Create a dataset tensor from the images and the labels
dataset = tf.data.Dataset.from_tensor_slices((mnist.train.images, mnist.train.labels))
# Create batches of data
dataset = dataset.batch(batch_size)
# Create an iterator, to go over the dataset
iterator = dataset.make_initializable_iterator()
# It is better to use 2 placeholders, to avoid to load all data into memory,
# and avoid the 2Gb restriction length of a tensor.
_data = tf.placeholder(tf.float32, [None, n_input])
_labels = tf.placeholder(tf.float32, [None, n_classes])
# Neural Net Input
X, Y = iterator.get_next()
# Create a graph for training
logits_train = conv_net(X, n_classes, dropout, reuse=False, is_training=True)
# Create another graph for testing that reuse the same weights, but has
# different behavior for 'dropout' (not applied).
logits_test = conv_net(X, n_classes, dropout, reuse=True, is_training=False)
# Define loss and optimizer (with train logits, for dropout to take effect)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
logits=logits_train, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Evaluate model (with test logits, for dropout to be disabled)
correct_pred = tf.equal(tf.argmax(logits_test, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
with tf.Session() as sess:
# Initialize the iterator
sess.run(iterator.initializer, feed_dict={_data: mnist.train.images,
_labels: mnist.train.labels})
sess.run(tf.global_variables_initializer())
for step in range(1, num_steps + 1):
try:
# Run optimization
sess.run(train_op)
except tf.errors.OutOfRangeError:
# Reload the iterator when it reaches the end of the dataset
sess.run(iterator.initializer,
feed_dict={_data: mnist.train.images,
_labels: mnist.train.labels})
sess.run(train_op)
if step % display_step == 0 or step == 1:
# Calculate batch loss and accuracy
# (note that this consume a new batch of data)
logits, loss, acc = sess.run([logits_test, loss_op, accuracy])
print("Step " + str(step) + ", Minibatch Loss= " + \
"{:.4f}".format(loss) + ", Training Accuracy= " + \
"{:.3f}".format(acc))
print("Optimization Finished!")
转换成TFRecords读取
TFRecords是一种二值文件格式,比较适合用于大规模数据存储和处理。
from PIL import Image
import numpy as np
import skimage.io as io
import tensorflow as tf
import matplotlib.pyplot as plt
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
filename_pairs = [("/Users/yuhua.cheng/Documents/dataset/segmentation/data/images_data_crop/00001.jpg",\
"/Users/yuhua.cheng/Documents/dataset/segmentation/data/jpg_mask/00001_mask.jpg"),
("/Users/yuhua.cheng/Documents/dataset/segmentation/data/images_data_crop/00002.jpg",\
"/Users/yuhua.cheng/Documents/dataset/segmentation/data/jpg_mask/00002_mask.jpg"),
("/Users/yuhua.cheng/Documents/dataset/segmentation/data/images_data_crop/00003.jpg",\
"/Users/yuhua.cheng/Documents/dataset/segmentation/data/jpg_mask/00003_mask.jpg")]
tfrecords_filename = 'pascal_voc_segmentation.tfrecords'
writer = tf.python_io.TFRecordWriter(tfrecords_filename)
# Let's collect the real images to later on compare
# to the reconstructed ones
original_images = []
for img_path, annotation_path in filename_pairs:
img = np.array(Image.open(img_path))
annotation = np.array(Image.open(annotation_path))
height = img.shape[0]
width = img.shape[1]
# Put in the original images into array
# Just for future check for correctness
original_images.append((img, annotation))
img_raw = img.tostring()
annotation_raw = annotation.tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'height': _int64_feature(height),
'width': _int64_feature(width),
'image_raw': _bytes_feature(img_raw),
'mask_raw': _bytes_feature(annotation_raw)}))
writer.write(example.SerializeToString())
writer.close()
reconstructed_images = []
record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_filename)
for string_record in record_iterator:
example = tf.train.Example()
example.ParseFromString(string_record)
height = int(example.features.feature['height']
.int64_list
.value[0])
width = int(example.features.feature['width']
.int64_list
.value[0])
img_string = (example.features.feature['image_raw']
.bytes_list
.value[0])
annotation_string = (example.features.feature['mask_raw']
.bytes_list
.value[0])
img_1d = np.fromstring(img_string, dtype=np.uint8)
reconstructed_img = img_1d.reshape((height, width, -1)) # -1 for depth
annotation_1d = np.fromstring(annotation_string, dtype=np.uint8)
# Annotations don't have depth (3rd dimension)
reconstructed_annotation = annotation_1d.reshape((height, width))
reconstructed_images.append((reconstructed_img, reconstructed_annotation))
for i, images in enumerate(reconstructed_images):
print(i)
image, annotation = images
plt.subplot(3, 2, 2*i+1)
plt.imshow(image)
plt.subplot(3, 2, 2*i+2)
plt.imshow(annotation)
plt.show()
在tensorflow session中读取TFRecords数据
import tensorflow as tf
#import skimage.io as io
import matplotlib.pyplot as io
IMAGE_HEIGHT = 384
IMAGE_WIDTH = 384
tfrecords_filename = 'pascal_voc_segmentation.tfrecords'
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
# Defaults are not specified since both keys are required.
features={
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
'mask_raw': tf.FixedLenFeature([], tf.string)
})
# Convert from a scalar string tensor (whose single string has
# length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
# [mnist.IMAGE_PIXELS].
image = tf.decode_raw(features['image_raw'], tf.uint8)
annotation = tf.decode_raw(features['mask_raw'], tf.uint8)
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
image_shape = tf.stack([height, width, 3])
annotation_shape = tf.stack([height, width, 1])
image = tf.reshape(image, image_shape)
annotation = tf.reshape(annotation, annotation_shape)
image_size_const = tf.constant((IMAGE_HEIGHT, IMAGE_WIDTH, 3), dtype=tf.int32)
annotation_size_const = tf.constant((IMAGE_HEIGHT, IMAGE_WIDTH, 1), dtype=tf.int32)
# Random transformations can be put here: right before you crop images
# to predefined size. To get more information look at the stackoverflow
# question linked above.
resized_image = tf.image.resize_image_with_crop_or_pad(image=image,
target_height=IMAGE_HEIGHT,
target_width=IMAGE_WIDTH)
resized_annotation = tf.image.resize_image_with_crop_or_pad(image=annotation,
target_height=IMAGE_HEIGHT,
target_width=IMAGE_WIDTH)
images, annotations = tf.train.shuffle_batch( [resized_image, resized_annotation],
batch_size=2,
capacity=30,
num_threads=2,
min_after_dequeue=10)
return images, annotations
filename_queue = tf.train.string_input_producer(
[tfrecords_filename])
# Even when reading in multiple threads, share the filename
# queue.
image, annotation = read_and_decode(filename_queue)
# The op for initializing the variables.
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
# Let's read off 5 batches just for example
for i in range(5):
img, anno = sess.run([image, annotation])
print(img[0, :, :, :].shape)
print('batch:',i)
# We selected the batch size of two
# So we should get two image pairs in each batch
# Let's make sure it is random
io.subplot(1,2,1)
io.imshow(img[0, :, :, :])
io.subplot(1,2,2)
io.imshow(anno[0, :, :, 0])
io.figure()
io.subplot(1,2,1)
io.imshow(img[1, :, :, :])
io.subplot(1,2,2)
io.imshow(anno[1, :, :, 0])
io.show()
coord.request_stop()
coord.join(threads)