环境:tensorflow 2.0.0rc1
目的:处理图像数据,构建简单的分类器。
数据:root/train
文件夹下有两个类别对应的图像文件夹,分别为ants
和bees
。
实现:
tf.keras.preprocessing.image.ImageDataGenerator
构建生成器tf.data.Dataset
# -*- coding:utf-8 -*-
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import os
import glob
import random
import pathlib
IMAGE_SHAPE = [300, 300]
root = r'D:\DataSet\hymenoptera_data'
class DataLoader():
def __init__(self, root, batch_size=4):
self.root = root
self.batch_size = batch_size
data_root = pathlib.Path(root)
self.all_img_paths = list(data_root.glob('*/*.jpg'))
self.all_img_paths = [str(path) for path in self.all_img_paths]
random.shuffle(self.all_img_paths)
self.label_names = sorted(item.name for item in data_root.glob(
'*/') if item.is_dir())
label_to_index = dict((name, index)
for index, name in enumerate(self.label_names))
self.all_img_labels = [label_to_index[pathlib.Path(path).parent.name]
for path in self.all_img_paths]
@staticmethod
def preprocess_image(image):
image = tf.image.decode_image(image, channels=3)
image = tf.image.resize_with_crop_or_pad(image, 300, 300)
image = tf.cast(image, tf.float64)
image /= 255.0
return image
@staticmethod
def load_and_preprocess_image(path):
image = tf.io.read_file(path)
return DataLoader.preprocess_image(image)
def get_ds(self):
path_ds = tf.data.Dataset.from_tensor_slices(self.all_img_paths)
label_ds = tf.data.Dataset.from_tensor_slices(self.all_img_labels)
image_ds = path_ds.map(self.load_and_preprocess_image)
ds = tf.data.Dataset.zip((image_ds, label_ds))
ds = ds.repeat().batch(self.batch_size)
# def load_and_preprocess_from_path_label(path, label):
# return load_and_preprocess_image(path), label
# ds = tf.data.Dataset.from_tensor_slices((all_img_paths, all_img_labels))
# ds = ds.map(load_and_preprocess_from_path_label)
return ds
def show(self, ds, num=2):
for data, label in ds.take(num):
print(self.label_names[label.numpy()[0]])
plt.imshow(data.numpy()[0, :, :, :])
plt.show()
def write_record(self, record_name):
ds_image = tf.data.Dataset.from_tensor_slices(
self.all_img_paths).map(tf.io.read_file)
record = tf.data.experimental.TFRecordWriter(record_name)
record.write(ds_image)
print('record saved in {}'.format(record_name))
def read_record(self, record_name):
image_ds = tf.data.TFRecordDataset(
record_name).map(self.preprocess_image)
label_ds = tf.data.Dataset.from_tensor_slices(self.all_img_labels)
ds = tf.data.Dataset.zip((image_ds, label_ds))
ds = ds.repeat()
# ds = ds.apply(tf.data.experimental.shuffle_and_repeat(
# buffer_size=10))
ds = ds.batch(self.batch_size)
return ds
def img_loader():
img_generator = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1/255)
img_data = img_generator.flow_from_directory(os.path.join(root, 'train'),
target_size=IMAGE_SHAPE,
batch_size=2)
val_data = img_generator.flow_from_directory(os.path.join(root, 'val'),
target_size=IMAGE_SHAPE,
batch_size=4)
for data, label in img_data:
print(data.shape)
print(label.shape)
break
return img_data
def build_model():
model = tf.keras.models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
input_shape=(300, 300, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.summary()
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(2, activation='softmax'))
return model
def build_func_model():
inputs = keras.Input(shape=(300, 300, 3), name='img')
x = layers.Conv2D(32, (3, 3), activation='relu')(inputs)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.Flatten()(x)
x = layers.Dense(64, activation='relu')(x)
outputs = layers.Dense(2, activation='softmax')(x)
model = keras.Model(inputs=inputs, outputs=outputs, name='demo')
return model
def resnet():
# https://keras.io/zh/applications/
base_model = keras.applications.resnet50.ResNet50(
input_shape=(300, 300, 3), weights=None, include_top=False, classes=2)
x = base_model.output
x = keras.layers.GlobalAveragePooling2D()(x)
outputs = keras.layers.Dense(2, activation='softmax')(x)
model = keras.models.Model(inputs=base_model.input, outputs=outputs)
return model
def train():
# 使用keras的ImageDataGenerator读取数据
model = build_model()
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=[tf.keras.metrics.Accuracy()])
model.fit_generator(img_loader(), epochs=2, steps_per_epoch=10)
# 使用Dataset读取数据
initial_learning_rate = 0.001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate,
decay_steps=10000,
decay_rate=0.96,
staircase=True)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'],)
tensorboard_cbk = keras.callbacks.TensorBoard(log_dir=r'F:\tmp')
callbacks = [keras.callbacks.ModelCheckpoint(
filepath='mymodel_{epoch}.h5'), tensorboard_cbk]
dataloader = DataLoader(os.path.join(root, 'train'), batch_size=2)
# dataloader.write_record('train.record')
ds = dataloader.read_record('train.record')
dataloader.show(ds)
model.fit(ds, epochs=2, steps_per_epoch=10)
# dataset
ds = dataloader.get_ds()
model.fit(ds, epochs=2, steps_per_epoch=10)
if __name__ == "__main__":
train()
参考:TF2.0测试版