读取方式1:一次性将序列读入计算图中。
import tensorflow as tf
import numpy as np
x = {"a": [i for i in range(5)],
"b": np.random.uniform(size=(5, 2))}
#创建dataset,里面包含 5个元素,分别为
# {'a': 0, 'b': array([0.31102309, 0.28081324])}
# {'a': 1, 'b': array([0.64559238, 0.9602511 ])}
# {'a': 2, 'b': array([0.5191022 , 0.29045949])}
# {'a': 3, 'b': array([0.80690428, 0.02572865])}
# {'a': 4, 'b': array([0.33659348, 0.39553411])}
dataset = tf.data.Dataset.from_tensor_slices(x)
#从 dataset 中实例化一个iterator,该 iterator 具有 one shot iterator 特性,
#即只能从头到尾读取一次
iterator = dataset.make_one_shot_iterator()
#从 iterator 中取出一个元素
one_element = iterator.get_next()
sess = tf.Session()
try:
while True:
print(sess.run(one_element))
except tf.errors.OutOfRangeError: #当元素被取完,再尝试取出元素就会抛出OutOfRangeError异常
print("end")
读取方式2:使用 tf.placholder 读取数据,一次性将数据读入内存中,但是每次只从内存数据中取出部分数据送入计算图,这样在数据较多时可以减小计算图占用空间。
import tensorflow as tf
import numpy as np
path = r"E:\tf_project\练习\exchangeData2.txt"
data = np.loadtxt(path, delimiter=",", dtype=np.float32)
#print(data)
features = data[:, 0]
labels = data[:, 1]
features_placeholder = tf.placeholder(features.dtype, features.shape)
labels_placeholder = tf.placeholder(labels.dtype, labels.shape)
dataset = tf.data.Dataset.from_tensor_slices((features_placeholder, labels_placeholder))
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()
sess = tf.Session()
sess.run(iterator.initializer, feed_dict={features_placeholder: features,
labels_placeholder: labels})
try:
while True:
value = sess.run(next_element)
print(value)
except tf.errors.OutOfRangeError:
print("end")
exchangeData2.txt
6.5379,1000
6.5428,1010
6.5559,2000
6.5321,3000
6.5062,2000
6.5062,1210
6.5062,2060
6.5062,3000
6.4909,2000
6.5029,1000
6.4933,4000
6.4874,5000
6.4874,1000
6.4874,2000
6.4973,3000
6.5262,6000
6.5054,9000
6.5045,3000
6.4606,4000
6.4606,5000
6.4349,6000
6.4415,1100
6.4329,2700
6.4174,3500
6.3989,4100
6.3989,7700
6.4034,6200
6.4017,1200
6.3550,2800
6.3188,1900
6.3198,1100
6.3198,1200
读取方式3:从文本中读取数据
import tensorflow as tf
import numpy as np
tf.reset_default_graph()
#file_path = r"E:\tf_project\NMT\zh-en\train.tags.zh-en.en"
file_path = r"E:\tf_project\练习\word_src.txt"
dataset = tf.data.TextLineDataset(file_path)
dataset = dataset.map(lambda string: tf.string_split([string]).values)
dataset = dataset.map(lambda x:(x, tf.size(x)))
iterator = dataset.make_one_shot_iterator()
sess = tf.Session()
try:
i = 0
while i < 10:
print(sess.run(iterator.get_next()))
i += 1
except tf.errors.OutOfRangeError:
print("end")
word_src.txt
it is the time