1、keras.datasets
2、tf.data.Dataset.from_tensor_slices
shuffle
map
batch
repeat
1、boston housing # 波斯顿房价预测
2、minist/fashion mnist
3、cifar10/cifar100
4、imdb # 用户评语
# ****************** mnist
(x,y),(x_test,y_test) = keras.datasets.mnist.load_data()
"""
返回numpy格式
"""
print(x.min())
print(x.max())
y_onehot = tf.one_hot(y,depth=10)
print(y_onehot[1])
# ***************** cifar10/100
"""
同样的图片,标注不同,10个大类,100个小类
"""
(x,y),(x_test,y_test) = keras.datasets.cifar10.load_data()
print(x.shape)
plt.imshow(x[2])
plt.show()
apple = plt.imread("./一颗苹果.jpg")
apple.shape
plt.imshow(apple)
plt.axis("off")
print(y[0])
(x,y),(x_test,y_test) = keras.datasets.imdb.load_data()
print(x.shape)
print(x[0])
print(y[0])
(x,y),(x_tses,y_test) = keras.datasets.cifar10.load_data()
db1 = tf.data.Dataset.from_tensor_slices(x)
"""
形成可迭代的数据库
"""
ite = iter(db1)
print(next(ite).shape)
db2 = tf.data.Dataset.from_tensor_slices((x,y)) # can not use [x,y]
ite2 = iter(db2)
print(next(ite2)[1].shape)
# ****************** db.shuffle
(x,y),(x_test,y_test) = keras.datasets.mnist.load_data()
db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db.shuffle(10000) # 数值越大 混乱度越大
# ******************** db.map
"""
数据预处理时使用,map可直接调用函数 对元素进行操作
"""
def preprocess(x,y):
x = tf.cast(x,dtype=tf.float32)
y = tf.cast(y,dtype=tf.int32)
x = x/tf.reduce_max(x)
y = tf.one_hot(y,depth=10)
return x,y
(x,y),(x_test,y_test) = keras.datasets.mnist.load_data()
db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db1 = db.map(preprocess)
result = next(iter(db1))
print(result[0].shape)
print(result[1].shape)
# ******************* db.batch
(x,y),(x_test,y_test) = keras.datasets.mnist.load_data()
db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db1 = db.batch(100) # 100指一份含100条数据
res = next(iter(db1))
print(res[0].shape)
print(res[1].shape)
# ******************* db.repeat
(x,y),(x_test,y_test) = keras.datasets.mnist.load_data()
db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db1 = db.batch(100)
db2 = db1.repeat() # 一直循环,不退出 next一直可取
db3 = db1.reduce(2) # 重复两次
# ****************** 实例
def mnist_data():
def preprocess(x,y):
x = tf.cast(x,dtype=tf.float32)/255
y = tf.one_hot(tf.cast(y,dtype=tf.int32),depth=10)
return x,y
(x,y),(x_test,y_test) = keras.datasets.mnist.load_data()
db_train = tf.data.Dataset.from_tensor_slices((x,y)).shuffle(10000)
db_test = tf.data.Dataset.from_tensor_slices((x_test,y_test)).shuffle(10000)
db_train = db_train.map(preprocess).batch(100).repeat()
db_test = db_test.map(preprocess).batch(100).repeat()
return db_train,db_test
if __name__ == "__main__":
mnist_data()
本文为参考龙龙老师的“深度学习与TensorFlow 2入门实战“课程书写的学习笔记
by CyrusMay 2022 04 16