《TensorFlow 与卷积神经网络 (从算法到入门)》学习笔记
Brief:
模型训练的第一步就是数据的读取,在卷积神经网络中,读取的数据主要是图片及其对应的标签。
TensorFlow 在各个设备(CPU,GPU等)之间传递数据时,使用队列。但是在CPU和GPU之间传递数据是非常缓慢的,为了避免数据传递带来的耗时问题,采用了异步传递的解决方案,CPU不断往队列传入数据,GPU不断从队列中读取数据。
常见的队列有:
队列包括三个操作:
函数原型:
tf.FIFOQueue(capacity,
dtypes,
shapes=None,
name=None,
shared_name=None,
name='fifo_queue')
参数:
举例:
import tensorflow as tf
import numpy as np
def init_data():
data_list = []
for i in range(10):
float_data = [i, i, i]
str_data = ['str_'+str(i), 'str'+str(i)]
data_list.append((float_data, str_data))
return data_list
if __name__=='__main__':
data_list = init_data()
queue = tf.FIFOQueue(capacity=10,
dtypes=[tf.float32, tf.string],
shapes=[(3,), (2,)])
fp_data_tf = tf.placeholder(shape=(3,), dtype=tf.float32)
str_data_tf = tf.placeholder(shape=(2,), dtype=tf.string)
enqueue_op = queue.enqueue((fp_data_tf, str_data_tf))
dequeue_data = queue.dequeue()
with tf.Session() as sess:
for fp_data, str_data in data_list:
sess.run(enqueue_op, feed_dict={
fp_data_tf:fp_data, str_data_tf:str_data})
for i in range(10):
print(sess.run(dequeue_data))
# 输出为:
# [array([0., 0., 0.], dtype=float32), array([b'str_0', b'str0'], dtype=object)]
# [array([1., 1., 1.], dtype=float32), array([b'str_1', b'str1'], dtype=object)]
# [array([2., 2., 2.], dtype=float32), array([b'str_2', b'str2'], dtype=object)]
# [array([3., 3., 3.], dtype=float32), array([b'str_3', b'str3'], dtype=object)]
# [array([4., 4., 4.], dtype=float32), array([b'str_4', b'str4'], dtype=object)]
# [array([5., 5., 5.], dtype=float32), array([b'str_5', b'str5'], dtype=object)]
# [array([6., 6., 6.], dtype=float32), array([b'str_6', b'str6'], dtype=object)]
# [array([7., 7., 7.], dtype=float32), array([b'str_7', b'str7'], dtype=object)]
# [array([8., 8., 8.], dtype=float32), array([b'str_8', b'str8'], dtype=object)]
# [array([9., 9., 9.], dtype=float32), array([b'str_9', b'str9'], dtype=object)]
函数原型:
tf.PriorityQueue(capacity,
types,
shapes=None,
name=None,
shared_name=None,
name='fifo_queue')
注意,这里和 tf.FIFOQueue函数参数唯一不一样的是, 这里的数据类型关键字是“types”, 不是“dtypes”,不知道为啥这样♂️,其他参数含义都一样。只是传入数据要加个priority。
举例:
import tensorflow as tf
import numpy as np
def init_data():
data_list = []
for i in range(10):
priority = 10 - i
float_data = [i, i, i]
str_data = ['str_'+str(i), 'str'+str(i)]
data_list.append((priority, float_data, str_data))
return data_list
if __name__=='__main__':
data_list = init_data()
queue = tf.PriorityQueue(capacity=10,
types=[tf.float32, tf.string],
shapes=[(3,), (2,)])
priority_tf = tf.placeholder(shape=(), dtype=tf.int64)
fp_data_tf = tf.placeholder(shape=(3,), dtype=tf.float32)
str_data_tf = tf.placeholder(shape=(2,), dtype=tf.string)
enqueue_op = queue.enqueue((priority_tf, fp_data_tf, str_data_tf))
dequeue_data = queue.dequeue()
with tf.Session() as sess:
for priority, fp_data, str_data in data_list:
sess.run(enqueue_op, feed_dict={
priority_tf: priority,
fp_data_tf: fp_data,
str_data_tf: str_data})
for i in range(10):
print(sess.run(dequeue_data))
# 输出为:
# [1, array([9., 9., 9.], dtype=float32), array([b'str_9', b'str9'], dtype=object)]
# [2, array([8., 8., 8.], dtype=float32), array([b'str_8', b'str8'], dtype=object)]
# [3, array([7., 7., 7.], dtype=float32), array([b'str_7', b'str7'], dtype=object)]
# [4, array([6., 6., 6.], dtype=float32), array([b'str_6', b'str6'], dtype=object)]
# [5, array([5., 5., 5.], dtype=float32), array([b'str_5', b'str5'], dtype=object)]
# [6, array([4., 4., 4.], dtype=float32), array([b'str_4', b'str4'], dtype=object)]
# [7, array([3., 3., 3.], dtype=float32), array([b'str_3', b'str3'], dtype=object)]
# [8, array([2., 2., 2.], dtype=float32), array([b'str_2', b'str2'], dtype=object)]
# [9, array([1., 1., 1.], dtype=float32), array([b'str_1', b'str1'], dtype=object)]
# [10, array([0., 0., 0.], dtype=float32), array([b'str_0', b'str0'], dtype=object)]
函数原型:
tf.RandomShuffleQueue(capacity,
min_after_dequeue,
dtypes,
shapes=None,
name=None,
seed=None
shared_name=None,
name='random_suffle_queue')
参数 (其他参数意同 tf.FIFOQueue):
import tensorflow as tf
import numpy as np
def init_data():
data_list = []
for i in range(10):
float_data = [i, i, i]
str_data = ['str_'+str(i), 'str'+str(i)]
data_list.append((float_data, str_data))
return data_list
if __name__=='__main__':
data_list = init_data()
queue = tf.RandomShuffleQueue(capacity=10,
min_after_dequeue=5,
dtypes=[tf.float32, tf.string],
shapes=[(3,), (2,)])
fp_data_tf = tf.placeholder(shape=(3,), dtype=tf.float32)
str_data_tf = tf.placeholder(shape=(2,), dtype=tf.string)
enqueue_op = queue.enqueue((fp_data_tf, str_data_tf))
dequeue_data = queue.dequeue()
with tf.Session() as sess:
for fp_data, str_data in data_list:
sess.run(enqueue_op, feed_dict={
fp_data_tf:fp_data, str_data_tf:str_data})
for i in range(10):
print(sess.run(dequeue_data))
# # 输出为:
# [array([8., 8., 8.], dtype=float32), array([b'str_8', b'str8'], dtype=object)]
# [array([0., 0., 0.], dtype=float32), array([b'str_0', b'str0'], dtype=object)]
# [array([1., 1., 1.], dtype=float32), array([b'str_1', b'str1'], dtype=object)]
# [array([6., 6., 6.], dtype=float32), array([b'str_6', b'str6'], dtype=object)]
# [array([4., 4., 4.], dtype=float32), array([b'str_4', b'str4'], dtype=object)]