宝可梦数据集
# pokemon.py
import os, glob
import random, csv
import tensorflow as tf
def load_csv(root, filename, name2label):
# root:数据集根目录
# filename:csv文件名
# name2label:类别名编码表,类别:数字
if not os.path.exists(os.path.join(root, filename)):
images = []
for name in name2label.keys():
# 'pokemon\\mewtwo\\00001.png
images += glob.glob(os.path.join(root, name, '*.png'))
images += glob.glob(os.path.join(root, name, '*.jpg'))
images += glob.glob(os.path.join(root, name, '*.jpeg'))
# 1167, 'pokemon\\bulbasaur\\00000000.png'
# print(len(images), images)
random.shuffle(images)
with open(os.path.join(root, filename), mode='w', newline='') as f:
writer = csv.writer(f)
for img in images: # 'pokemon\\bulbasaur\\00000000.png'
name = img.split(os.sep)[-2]
label = name2label[name]
# 'pokemon\\bulbasaur\\00000000.png', 0
writer.writerow([img, label])
print('written into csv file:', filename)
# read from csv file
images, labels = [], []
with open(os.path.join(root, filename)) as f:
reader = csv.reader(f)
for row in reader:
# 'pokemon\\bulbasaur\\00000000.png', 0
img, label = row
label = int(label)
images.append(img)
labels.append(label)
assert len(images) == len(labels)
return images, labels
def load_pokemon(root, mode='train'):
# 创建数字编码表
name2label = {} # "类别":数字
for name in sorted(os.listdir(os.path.join(root))):
if not os.path.isdir(os.path.join(root, name)):
continue
# 给每个类别编码一个数字
name2label[name] = len(name2label.keys())
# 读取Label信息
# [file1,file2,...], [3,1,...]
images, labels = load_csv(root, 'images.csv', name2label)
if mode == 'train': # 60%
images = images[:int(0.6 * len(images))]
labels = labels[:int(0.6 * len(labels))]
elif mode == 'val': # 20% = 60%->80%
images = images[int(0.6 * len(images)):int(0.8 * len(images))]
labels = labels[int(0.6 * len(labels)):int(0.8 * len(labels))]
else: # 20% = 80%->100%
images = images[int(0.8 * len(images)):]
labels = labels[int(0.8 * len(labels)):]
return images, labels, name2label
# imagenet数据集所有图像的均值与方差
# img_mean = tf.constant([0.485, 0.456, 0.406])
# img_std = tf.constant([0.229, 0.224, 0.225])
def normalize(x, mean=img_mean, std=img_std): # 标准化到0-1
# x:[224, 224, 3] mean:[224, 224, 3], std:[3]
x = (x - mean) / std
return x
def denormalize(x, mean=img_mean, std=img_std): # 处理回原图,方便可视化
x = x * std + mean
return x
def preprocess(x,y):
# x: 图片的路径,y:图片的数字编码
x = tf.io.read_file(x) # 把图片路径转变为图片对象
x = tf.image.decode_jpeg(x, channels=3) # 将jpg格式解码成图片的tensor格式
x = tf.image.resize(x, [244, 244]) # 放大. 如果不想做裁剪直接resize到224即可
# data augmentation 数据增强
# x = tf.image.random_flip_up_down(x) 随机进行上下翻转
# x= tf.image.random_flip_left_right(x) 随机进行左右翻转
x = tf.image.random_crop(x, [224, 224, 3]) # 裁剪.
# x: [0,255]=> 0~1 之间
x = tf.cast(x, dtype=tf.float32) / 255.
# 标准化到 N(0,1) 的正态分布
x = normalize(x)
y = tf.convert_to_tensor(y)
return x, y
if __name__ == '__main__':
main()
# resnet.py
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
class ResnetBlock(keras.Model):
def __init__(self, channels, strides=1):
super(ResnetBlock, self).__init__()
self.channels = channels
self.strides = strides
self.conv1 = layers.Conv2D(channels, 3, strides=strides,padding=[[0,0],[1,1],[1,1],[0,0]])
self.bn1 = keras.layers.BatchNormalization()
self.conv2 = layers.Conv2D(channels, 3, strides=1, padding=[[0,0],[1,1],[1,1],[0,0]])
self.bn2 = keras.layers.BatchNormalization()
if strides!=1:
self.down_conv = layers.Conv2D(channels, 1, strides=strides, padding='valid')
self.down_bn = tf.keras.layers.BatchNormalization()
def call(self, inputs, training=None):
residual = inputs
x = self.conv1(inputs)
x = tf.nn.relu(x)
x = self.bn1(x, training=training)
x = self.conv2(x)
x = tf.nn.relu(x)
x = self.bn2(x, training=training)
# 残差连接
if self.strides!=1:
residual = self.down_conv(inputs)
residual = tf.nn.relu(residual)
residual = self.down_bn(residual, training=training)
x = x + residual
x = tf.nn.relu(x)
return x
class ResNet(keras.Model):
def __init__(self, num_classes, initial_filters=16, **kwargs):
super(ResNet, self).__init__(**kwargs)
self.stem = layers.Conv2D(initial_filters, 3, strides=3, padding='valid')
self.blocks = keras.models.Sequential([
ResnetBlock(initial_filters * 2, strides=3),
ResnetBlock(initial_filters * 2, strides=1),
# layers.Dropout(rate=0.5) 防止过拟合
ResnetBlock(initial_filters * 4, strides=3),
ResnetBlock(initial_filters * 4, strides=1),
ResnetBlock(initial_filters * 8, strides=2),
ResnetBlock(initial_filters * 8, strides=1),
ResnetBlock(initial_filters * 16, strides=2),
ResnetBlock(initial_filters * 16, strides=1),
])
self.final_bn = layers.BatchNormalization()
self.avg_pool = layers.GlobalMaxPool2D()
self.fc = layers.Dense(num_classes)
def call(self, inputs, training=None):
out = self.stem(inputs,training=training)
out = tf.nn.relu(out)
out = self.blocks(out, training=training)
out = self.final_bn(out, training=training)
out = self.avg_pool(out)
out = self.fc(out)
return out
if __name__ == '__main__':
main()
# train.py
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers,optimizers,losses
from tensorflow.keras.callbacks import EarlyStopping
from pokemon import load_pokemon, normalize, denormalize
from resnet import ResNet
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
def preprocess(x,y):
# x: 图片的路径,y:图片的数字编码
x = tf.io.read_file(x) # 把图片路径转变为图片对象
x = tf.image.decode_jpeg(x, channels=3) # 将jpg格式解码成图片的tensor格式
x = tf.image.resize(x, [244, 244]) # 放大. 如果不想做裁剪直接resize到224即可
# data augmentation 数据增强
# x = tf.image.random_flip_up_down(x) 随机进行上下翻转
# x= tf.image.random_flip_left_right(x) 随机进行左右翻转
x = tf.image.random_crop(x, [224, 224, 3]) # 裁剪.
# x: [0,255]=> 0~1 之间
x = tf.cast(x, dtype=tf.float32) / 255.
# 标准化到 N(0,1) 的正态分布
x = normalize(x)
y = tf.convert_to_tensor(y)
return x, y
batchsz = 256
# creat train database
images, labels, table = load_pokemon('pokemon',mode='train')
db_train = tf.data.Dataset.from_tensor_slices((images, labels))
db_train = db_train.shuffle(1000).map(preprocess).batch(batchsz)
# crate validation database
images2, labels2, table = load_pokemon('pokemon',mode='val')
db_val = tf.data.Dataset.from_tensor_slices((images2, labels2))
db_val = db_val.map(preprocess).batch(batchsz)
# create test database
images3, labels3, table = load_pokemon('pokemon',mode='test')
db_test = tf.data.Dataset.from_tensor_slices((images3, labels3))
db_test = db_test.map(preprocess).batch(batchsz)
'''
当数据集过小时,可以尝试这个网络
resnet = keras.Sequential([
layers.Conv2D(16,5,3),
layers.MaxPool2D(3,3),
layers.ReLU(),
layers.Conv2D(64,5,3),
layers.MaxPool2D(2,2),
layers.ReLU(),
layers.Flatten(),
layers.Dense(64),
layers.ReLU(),
layers.Dense(5)
])
'''
resnet = ResNet(5)
resnet.build(input_shape=(4, 224, 224, 3))
resnet.summary()
early_stopping = EarlyStopping(
monitor='val_accuracy', # 监听validation的准确率指标
min_delta=0.001,
patience=5 # 若连续5个epoch没有增加0.001,则停止训练
)
resnet.compile(optimizer=optimizers.Adam(lr=1e-3),
loss=losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
resnet.fit(db_train, validation_data=db_val, validation_freq=1, epochs=100,
callbacks=[early_stopping])
resnet.evaluate(db_test)
# transfer.py
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers,optimizers,losses
from tensorflow.keras.callbacks import EarlyStopping
from pokemon import load_pokemon,normalize
tf.random.set_seed(22)
np.random.seed(22)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
assert tf.__version__.startswith('2.')
def preprocess(x,y):
# x: 图片的路径,y:图片的数字编码
x = tf.io.read_file(x) # 把图片路径转变为图片对象
x = tf.image.decode_jpeg(x, channels=3) # 将jpg格式解码成图片的tensor格式
x = tf.image.resize(x, [244, 244]) # 放大. 如果不想做裁剪直接resize到224即可
# data augmentation 数据增强
# x = tf.image.random_flip_up_down(x) 随机进行上下翻转
# x= tf.image.random_flip_left_right(x) 随机进行左右翻转
x = tf.image.random_crop(x, [224, 224, 3]) # 裁剪.
# x: [0,255]=> 0~1 之间
x = tf.cast(x, dtype=tf.float32) / 255.
# 标准化到 N(0,1) 的正态分布
x = normalize(x)
y = tf.convert_to_tensor(y)
return x, y
batchsz = 256
# creat train database
images, labels, table = load_pokemon('pokemon',mode='train')
db_train = tf.data.Dataset.from_tensor_slices((images, labels))
db_train = db_train.shuffle(1000).map(preprocess).batch(batchsz)
# crate validation database
images2, labels2, table = load_pokemon('pokemon',mode='val')
db_val = tf.data.Dataset.from_tensor_slices((images2, labels2))
db_val = db_val.map(preprocess).batch(batchsz)
# create test database
images3, labels3, table = load_pokemon('pokemon',mode='test')
db_test = tf.data.Dataset.from_tensor_slices((images3, labels3))
db_test = db_test.map(preprocess).batch(batchsz)
# 使用VGG19网络进行fine-tuning
# 导入vgg网络与其在imagenet上训练好的权值,include_top=False代表输出层删除,选择最大池化层
vgg = keras.applications.VGG19(weights='imagenet', include_top=False, pooling='max')
vgg.trainable = False # 冻结该部分网络参数
net = keras.Sequential([
vgg,
layers.Dense(5) # 只训练该层
])
net.build(input_shape=(4,224,224,3))
net.summary()
early_stopping = EarlyStopping(
monitor='val_accuracy', # 监听validation的准确率指标
min_delta=0.001,
patience=5 # 若连续5个epoch没有增加0.001,则停止训练
)
resnet.compile(optimizer=optimizers.Adam(lr=1e-3),
loss=losses.CategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
resnet.fit(db_train, validation_data=db_val, validation_freq=1, epochs=100,
callbacks=[early_stopping])
resnet.evaluate(db_test)