Dataset
tf.data.Dataset.from_tensor_slices()
- dataset.make_one_shot_iterator()
iterator = dataset.make_one_shot_iterator()从dataset中实例化了一个iterator,这个iterator是一个“one shot iterator”,即只能从头到尾读取一次。one_element = iterator.get_next()表示从iterator里取出一个元素。由于这是非Eager模式,所以one_element只是一个Tensor,并不是一个实际的值。调用sess.run(one_element)后,才能真正地取出一个值。如果一个dataset中元素被读取完了,再尝试sess.run(one_element)的话,就会抛出tf.errors.OutOfRangeError异常,这个行为与使用队列方式读取数据的行为是一致的。在实际程序中,可以在外界捕捉这个异常以判断数据是否读取完。它可以自动初始化,但用完一次后不能再次初始化。
iterator = dataset.make_one_shot_iterator()
one_element = iterator.get_next()
with tf.Session() as sess:
for i in range(5):
print(sess.run(one_element))
- make_initializable_iterator
initializable iterator在one_shot的基础上可以多次使用该iterator,每次读取完之后需要重新初始化,实现了单个iterator下单个dataset中填充数据的切换。
实战
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras
print(tf.__version__)
print(sys.version_info)
for module in mpl, np ,pd, sklearn, tf, keras:
print(module.__name__, module.__version__)
1.15.0
sys.version_info(major=3, minor=7, micro=6, releaselevel='final', serial=0)
matplotlib 3.1.3
numpy 1.18.1
pandas 1.0.1
sklearn 0.22.1
tensorflow 1.15.0
tensorflow.python.keras.api._v1.keras 2.2.4-tf
fashion_mnist = keras.datasets.fashion_mnist
(x_train_all, y_train_all),(x_test, y_test) = fashion_mnist.load_data()
x_valid, x_train = x_train_all[:5000],x_train_all[5000:]
y_valid, y_train = y_train_all[:5000],y_train_all[5000:]
print(x_valid.shape, y_valid.shape)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)
(5000, 28, 28) (5000,)
(55000, 28, 28) (55000,)
(10000, 28, 28) (10000,)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(
x_train.astype(np.float32).reshape(-1, 1)).reshape(-1, 28 * 28)
x_valid_scaled = scaler.transform(
x_valid.astype(np.float32).reshape(-1, 1)).reshape(-1, 28 * 28)
x_test_scaled = scaler.transform(
x_test.astype(np.float32).reshape(-1, 1)).reshape(-1, 28 * 28)
y_train = np.asarray(y_train, dtype = np.int64)
y_valid = np.asarray(y_valid, dtype = np.int64)
y_test = np.asarray(y_test, dtype = np.int64)
def make_dataset(images, labels, epochs, batch_size, shuffle = True):
dataset = tf.data.Dataset.from_tensor_slices((images, labels))
if shuffle:
dataset = dataset.shuffle(10000)
dataset = dataset.repeat(epochs).batch(batch_size)
return dataset
batch_size = 20
epochs = 10
dataset = make_dataset(x_train_scaled, y_train,
epochs=epochs, batch_size = batch_size)
dataset_iter = dataset.make_one_shot_iterator()
x, y = dataset_iter.get_next()
with tf.Session() as sess:
x_val, y_val = sess.run([x, y])
print(x_val.shape)
print(y_val.shape)
WARNING:tensorflow:From :16: DatasetV1.make_one_shot_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.
(20, 784)
(20,)
batch_size = 20
epochs = 10
images_placeholder = tf.placeholder(tf.float32, [None, 28*28])
labels_placeholder = tf.placeholder(tf.int64, [None, ])
dataset = make_dataset(images_placeholder, labels_placeholder, epochs=epochs, batch_size = batch_size)
dataset_iter = dataset.make_initializable_iterator()
x, y = dataset_iter.get_next()
with tf.Session() as sess:
sess.run(dataset_iter.initializer,
feed_dict = {
images_placeholder:x_train_scaled,
labels_placeholder:y_train,
x_val, y_val = sess.run([x, y])
print(x_val.shape)
print(y_val.shape)
sess.run(dataset_iter.initializer,
feed_dict = {
images_placeholder:x_valid_scaled,
labels_placeholder:y_valid,
x_val, y_val = sess.run([x, y])
print(x_val.shape)
print(y_val.shape)
hidden_units = [100, 100]
class_num = 10
input_for_next_layer = x
for hidden_unit in hidden_units:
input_for_next_layer = tf.layers.dense(input_for_next_layer,
hidden_unit,
activation=tf.nn.relu)
logits = tf.layers.dense(input_for_next_layer,class_num)
loss = tf.losses.sparse_softmax_cross_entropy(labels = y,
logits = logits)
prediction = tf.argmax(logits, 1)
correct_prediction = tf.equal(prediction, y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)
WARNING:tensorflow:From :14: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.Dense instead.
WARNING:tensorflow:From E:\Anaconda\anaconda\envs\tensorflow1\lib\site-packages\tensorflow_core\python\layers\core.py:187: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `layer.__call__` method instead.
WARNING:tensorflow:From E:\Anaconda\anaconda\envs\tensorflow1\lib\site-packages\tensorflow_core\python\ops\losses\losses_impl.py:121: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
print(x)
print(logits)
Tensor("IteratorGetNext:0", shape=(?, 784), dtype=float32)
Tensor("dense_2/BiasAdd:0", shape=(?, 10), dtype=float32)
init = tf.global_variables_initializer()
train_steps_per_epoch = x_train.shape[0] // batch_size
with tf.Session() as sess:
sess.run(init)
for epoch in range(epochs):
for step in range(train_steps_per_epoch):
loss_val, accuracy_val, _ =sess.run([loss, accuracy, train_op])
print('\r[Train] epoch: %d, step:%d, loss: %3.5f, accuracy: %2.2f'
% (epoch, step, loss_val, accuracy_val), end="")
[Train] epoch: 9, step:2749, loss: 0.09922, accuracy: 0.95