Github-TFRecord学习
极客学院-数据处理
知乎-十图详解tensorflow数据读取机制(附代码)
tensorflow使用笔记(3)–Reading data(1)
Standford-TensorFlow Input Pipeline
Python tensorflow.WholeFileReader() Examples
【TensorFlow】数据处理(输入文件队列)
Tensorflow读数据的一个浅坑
CSDN-tensorflow学习笔记(五):TensorFlow变量共享和数据读取
TFRecordReader “OutOfRangeError错误原因汇总
How to write into and read from a TFRecords file in TensorFlow
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 29 20:49:30 2018
@author: brucelau
"""
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import time
from os import listdir
import os
from tensorflow.python.ops import io_ops
from tensorflow.python.ops import variable_scope as vs
import matplotlib.pyplot as plt
#%% TFRecord Test: Making TFRecord Data File
import numpy as np
import tensorflow as tf
# Define Transfer Function
def _int64_feature(value):
return tf.train.Feature(int64_list = tf.train.Int64List(value=[value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list = tf.train.BytesList(value=[value]))
# Making Data
data_x = np.arange(1,1001).reshape((-1,10))
data_y = np.arange(100)
# Loop for Saving Files
for name_idx in range(1,11):
fn = 'TestRecords/data.tfrecords-%.2d'%name_idx
writer = tf.python_io.TFRecordWriter(fn)
for file_num in range(1,11):
item = (name_idx-1)*10+file_num-1
image_raw = data_x[item].tostring()
# print(item)
example = tf.train.Example(features = tf.train.Features(feature={
'row':_bytes_feature(image_raw),
'label':_int64_feature(data_y[item])
}))
writer.write(example.SerializeToString())
writer.close()
##%% TFRecord Test: Loading TFRecordd Data File
#import tensorflow as tf
#reader = tf.TFRecordReader()
##files = tf.train.match_filenames_once(['TestRecords/data.tfrecords-01'])
#filename_queue = tf.train.string_input_producer(['TestRecords/data.tfrecords-01'])
#_,serialized_example = reader.read(filename_queue)
#features = tf.parse_single_example(
# serialized_example,
# features={
# 'row':tf.FixedLenFeature([],tf.string),
# 'label':tf.FixedLenFeature([],tf.int64)})
#
## tf.decode_raw可以将字符串解析成图像对应的像素素组
#images = tf.decode_raw(features['row'],tf.int64)
#labels = tf.cast(features['label'],tf.int32)
#
#with tf.Session() as sess:
# sess.run(tf.local_variables_initializer())
# sess.run(tf.global_variables_initializer())
# # 启动多线程处理输入数据,7.3节将更加详细地介绍Tensorflow多线程处理
# coord = tf.train.Coordinator()
# threads = tf.train.start_queue_runners(sess=sess, coord=coord)
#
# # 每次运行可以读取TFRecord文件中的一个样例。当所有样例都读完之后,在此样例中程序会在重头读取
# for i in range(2):
# image, label= sess.run([images, labels])
# coord.request_stop()
# coord.join(threads)
#print(image)
#print(label)
#%%
files = tf.train.match_filenames_once(['TestRecords/data.tfrecords-*'])
## Why can's take the following list into the tf.train_string_input_producer()
#import tensorflow as tf
#from os import listdir
#file = listdir('TestRecords/')
#file.sort()
#files = tf.get_variable(name='test7',initializer=file)
#%% Loop Generator for single sample each time according to the files order
filename_queue = tf.train.string_input_producer(files,shuffle=False,num_epochs=2)
reader = tf.TFRecordReader()
_,serialized_example =reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'row':tf.FixedLenFeature([],tf.string),
'label':tf.FixedLenFeature([],tf.int64)})
decoded_data = tf.decode_raw(features['row'],tf.int64) # decode according to the original dtype
labels = tf.cast(features['label'],tf.int64)
c = []
with tf.Session() as sess:
# Initialization
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
# See the loaded file names
f1 = sess.run(files)
f2 = [i.decode('ascii') for i in f1]
f2.sort()
print(f1)
print(f2)
# Make Coordinator
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
for i in range(200):
de_data,la = sess.run([decoded_data,labels]) # should use together as a [] list
# la = sess.run([labels]) # should not use in this format
print(la)
c.append(la)
# print(sess.run([retyped_data]))
coord.request_stop()
coord.join(threads)
plt.plot(c)
#%%
#%% Batch generator: data from several files simultaneous
filename_queue = tf.train.string_input_producer(files,shuffle=False,num_epochs=2)
# 注意此处`shuffle=True`是随机打乱tfrecords文件的顺序,如果只有一个文件,且包含所有数据,则产生的数据顺序不变
reader = tf.TFRecordReader()
_,serialized_example =reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'row':tf.FixedLenFeature([],tf.string),
'label':tf.FixedLenFeature([],tf.int64)})
decoded_data = tf.decode_raw(features['row'],tf.int64) # decode
decoded_data = tf.reshape(decoded_data,[10]) # must be assin the shape or error
labels = tf.cast(features['label'],tf.int64)
c = []
batch_size = 10
capacity = 1000 + 3 * batch_size
capacity = 1000 + 3 * batch_size
example_batch, label_batch = tf.train.shuffle_batch([decoded_data, labels],
batch_size=batch_size,
capacity=capacity,
min_after_dequeue=30)
c = []
with tf.Session() as sess:
# Initialization
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
# See the loaded file names
f1 = sess.run(files)
f2 = [i.decode('ascii') for i in f1]
f2.sort()
# print(f1)
# print(f2)
# Make Coordinator
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
for i in range(20):
cur_example_batch, cur_label_batch = sess.run([example_batch, label_batch])
print (cur_label_batch)
c.append(cur_label_batch)
coord.request_stop()
coord.join(threads)
C = np.array(c).reshape((-1))
C.sort()
plt.plot(C)