@author--HCF
首先准备手写汉字数字,并按照一定间隔整齐排列,这里是10行9列,共90个数字,如图所示
接下来使用PIL库对集中在一起的数字进行分割从而得到 90 ∗ 10 = 900 90*10=900 90∗10=900个样本
from PIL import Image # 操作图像
import os # 创建删除文件等
def get_image():
# 此函数用于对整齐写在一张图上的数字的图片进行等间距分割
# 各个汉字数字的目录名称
dirs = ['one', 'two', 'three', 'four', 'five',
'six', 'seven', 'eight', 'nine', 'ten']
# 对每个数字图片进行遍历
for num in dirs:
tempdir = "./Pictures/generate_picture/" + num
flag = os.path.exists(tempdir)
# 不存在目录即创建,存在即删除里面文件(这里假设里面没有目录文件)
if not flag:
os.makedirs(tempdir)
else:
files = os.listdir(tempdir)
for file in files:
os.remove(tempdir + "/" + file)
# 打开原始图片
origin_picture_dir = "./Pictures/origin_picture/" + num + ".png"
img = Image.open(origin_picture_dir)
size = img.size
# print(size)
rol = 10
col = 9
# 准备将图片切割成col * rol张小图片,这里是10行9列
width = int(size[0] // col)
height = int(size[1] // rol)
# print(width, height) # 切割后的小图的宽度和高度
# 等间距切割并保存文件
for j in range(col):
for i in range(rol):
box = (width * j, height * i, width *
(j + 1), height * (i + 1))
region = img.crop(box)
region = region.convert('RGB')
region = region.resize((64, 64))
region.save(tempdir + "/" + num + "_{}{}.png".format(j, i))
get_image()
获取图片后,先查看一下相关路径是否得到了需要的样本图形
for root, dirs, files in os.walk('./Pictures/generate_picture/one'):
print(files) #当前路径下所有非目录子文件
['one_00.png', 'one_01.png', 'one_02.png', 'one_03.png', 'one_04.png', 'one_05.png', 'one_06.png', 'one_07.png', 'one_08.png', 'one_09.png', 'one_10.png', 'one_11.png', 'one_12.png', 'one_13.png', 'one_14.png', 'one_15.png', 'one_16.png', 'one_17.png', 'one_18.png', 'one_19.png', 'one_20.png', 'one_21.png', 'one_22.png', 'one_23.png', 'one_24.png', 'one_25.png', 'one_26.png', 'one_27.png', 'one_28.png', 'one_29.png', 'one_30.png', 'one_31.png', 'one_32.png', 'one_33.png', 'one_34.png', 'one_35.png', 'one_36.png', 'one_37.png', 'one_38.png', 'one_39.png', 'one_40.png', 'one_41.png', 'one_42.png', 'one_43.png', 'one_44.png', 'one_45.png', 'one_46.png', 'one_47.png', 'one_48.png', 'one_49.png', 'one_50.png', 'one_51.png', 'one_52.png', 'one_53.png', 'one_54.png', 'one_55.png', 'one_56.png', 'one_57.png', 'one_58.png', 'one_59.png', 'one_60.png', 'one_61.png', 'one_62.png', 'one_63.png', 'one_64.png', 'one_65.png', 'one_66.png', 'one_67.png', 'one_68.png', 'one_69.png', 'one_70.png', 'one_71.png', 'one_72.png', 'one_73.png', 'one_74.png', 'one_75.png', 'one_76.png', 'one_77.png', 'one_78.png', 'one_79.png', 'one_80.png', 'one_81.png', 'one_82.png', 'one_83.png', 'one_84.png', 'one_85.png', 'one_86.png', 'one_87.png', 'one_88.png', 'one_89.png']
import matplotlib.pyplot as plt
plt.figure(figsize=(15,10))
i=0
for i in range(len(files)):
plt.subplot(10, 9, i + 1)
image = plt.imread("./Pictures/generate_picture/one/" + files[i])
plt.imshow(image)
plt.subplots_adjust(hspace=1)
plt.show()
获取图片后,使用h5py库创建数据集h5文件
通过上述操作可以得到每一个小图片的绝对位置,方便后续操作处理
import os
import numpy as np
import matplotlib.pyplot as plt
import h5py
dirs = {
'one':1, 'two':2, 'three':3, 'four':4, 'five':5,
'six':6, 'seven':7, 'eight':8, 'nine':9, 'ten':10}
dic_image = {
}
dic_label = {
}
# 为每一组图片创建一个数据列表和标签列表并对该组图片进行遍历
for (dir, index) in dirs.items():
dic_image[dir] = []
dic_label[dir] = []
tempdir = "./Pictures/generate_picture/" + dir
for file in os.listdir(tempdir):
dic_image[dir].append(tempdir + "/" + file)
dic_label[dir].append(index)
# print(dic_image, dic_label)
接下来将每个组的两个列表分别进行以下操作
list_image_stack = []
list_label_stack = []
for dir in dirs.keys():
list_image_stack = np.hstack((list_image_stack, dic_image[dir]))
list_label_stack = np.hstack((list_label_stack, dic_label[dir]))
# print(list_image_stack, list_label_stack)
list_temp = np.array([list_image_stack, list_label_stack]).T
# print(list_temp)
np.random.shuffle(list_temp)
# print(list_temp)
list_image = list(list_temp[:, 0])
list_label = [int(float(list_temp[i,1])) for i in range(list_temp.shape[0])]
# print(list_image, list_label)
现在开始创建数据集
total_size = len(list_label)
train_size = int(0.95 * total_size)
test_size = total_size - train_size
print(train_size, test_size)
# 初始化数组全部为0,相关维度为(样本数*height*width*rgb通道数)
train_image = np.zeros((train_size, 64, 64, 3)).astype('float32')
test_image = np.zeros((test_size, 64, 64, 3)).astype('float32')
# # 普通格式输出
# train_label = np.zeros((train_size, 10, 1)).astype('int')
# test_label = np.zeros((test_size, 10, 1)).astype('int')
# 独热码格式
train_label = np.zeros((train_size, 10, )).astype('int')
test_label = np.zeros((test_size, 10, )).astype('int')
for i in range(train_size):
train_image[i] = np.array(plt.imread(list_image[i]))
# # 普通输出
# train_label[i] = np.array(list_label[i])
# 独热码输出
train_label[i] = np.array(
[1 if k == list_label[i] - 1 else 0 for k in range(len(dirs))])
for i in range(test_size):
test_image[i] = np.array(plt.imread(list_image[train_size + i]))
test_label[i] = np.array(
[1 if k == list_label[train_size + i] - 1 else 0 for k in range(len(dirs))])
855 45
可以查看一下转换是否成功
plt.figure(figsize=(15,3))
for i in range(6):
ax = plt.subplot(1,6, i + 1)
plt.imshow(train_image[i])
ax.set_title(str(train_label[i]))
接下来将数据集保存在data.h5文件中,使用h5py库进行创建、写入、读取等操作
with h5py.File('./data.h5', 'w') as fp:
fp.create_dataset('X_train', data = train_image)
fp.create_dataset('y_train', data = train_label)
fp.create_dataset('X_test', data = test_image)
fp.create_dataset('y_test', data = test_label)
fp.close()
查看一下当前路径是否新增了data.h5文件,以及其大小(因为之前使用的jupyter notebook)
%ls
驱动器 D 中的卷是 新加卷
卷的序列号是 32F5-D4FF
D:\Python\手写汉字数字识别 的目录
2021/05/15 15:11 <DIR> .
2021/05/15 15:11 <DIR> ..
2021/05/15 13:23 <DIR> .ipynb_checkpoints
2021/05/15 15:13 44,276,896 data.h5
2021/05/15 13:19 570 get_data.py
2021/05/15 13:18 1,740 get_image.py
2021/05/15 15:11 22,372 main.ipynb
2021/05/15 13:32 <DIR> Pictures
4 个文件 44,301,578 字节
4 个目录 263,258,112 可用字节
data = h5py.File('./data.h5', 'r')
x_train = np.array(data['X_train'][:])
y_train = np.array(data['y_train'][:])
x_test = np.array(data['X_test'][:])
y_test = np.array(data['y_test'][:])
# print (x_train.shape,y_train.shape)
# print (x_test.shape,y_test.shape)
#数据集格式转换
x_train=x_train.astype('float32')
y_train=y_train.astype('int')
x_test=x_test.astype('float32')
y_test=y_test.astype('int')
x_train = x_train.reshape(x_train.shape[0], 64, 64, 3)
x_test = x_test.reshape(x_test.shape[0], 64, 64, 3)
print(x_train.shape, x_test.shape)
(855, 64, 64, 3) (45, 64, 64, 3)
下面查看一下数据是否读取成功,同时也可以验证数据集是否创建成功
import random
plt.figure(figsize=(15, 10))
for image_index in range(40):
# 显示
index = random.sample(list(range(len(x_train))),40)[image_index]
ax = plt.subplot(8, 5, image_index + 1)
ax.set_title(str(y_train[index]), font='Consolas')
plt.imshow(x_train[index].reshape(64, 64, 3))
plt.subplots_adjust(hspace=1.5)
plt.show()
数据到手,现在可以开始搭建网络了,采用LeNet网络进行训练
首先是一些基础参数的设置
'''模型训练参数'''
num_epochs = 10 # 训练次数
batch_size = 5 # 小批量
alpha = 0.001 # 学习率
# 设置超过多少省略显示
np.set_printoptions(threshold=np.inf)
下面开始建立网络架构
注意valid和same方式填充的区别:
import tensorflow as tf
'''
搭建网络架构,根据LeNet的结构进行搭建,严格来说并不尽相同,
输入的像素不同,最后一个卷积层的核大小不同
输入层INPUT的手写体是64像素*64像素*3通道,卷积层C1通过5*5的卷积核运算,
包括6个特征平面,步幅为1,valid方式填充,激活函数设置为relu
池化层S2使用2x2的滤波器进行采样,采样后输出减半
输入层INPUT-->卷积层C1-->池化层S2-->池化层S4-->卷积层C5-->
全连接层F6-->全连接层F7-->全连接层F8
'''
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters=6, kernel_size=(
5, 5), padding='valid', activation=tf.nn.relu, input_shape=(64, 64, 3)),
tf.keras.layers.AveragePooling2D(
pool_size=(2, 2), strides=(2, 2), padding='same'),
tf.keras.layers.Conv2D(filters=16, kernel_size=(
5, 5), padding='valid', activation=tf.nn.relu, input_shape=(64, 64, 3)),
tf.keras.layers.AveragePooling2D(
pool_size=(2, 2), strides=(2, 2), padding='same'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(units=120, activation=tf.nn.relu),
tf.keras.layers.Dense(units=84, activation=tf.nn.relu),
tf.keras.layers.Dense(units=10, activation=tf.nn.softmax),
])
然后配置一下训练方法,并将网络的相关参数信息打印出来,开始训练
import datetime
'''打印出网络结构和参数统计'''
model.summary()
'''
配置训练方法:
使用Adam优化器和交叉熵损失函数
'''
model.compile(
optimizer=tf.keras.optimizers.Adam(alpha),
loss=tf.keras.losses.categorical_crossentropy,
metrics=['accuracy']
)
# 记录训练时间
start_time = datetime.datetime.now()
'''执行训练过程,并将误差和准确度记录在history中'''
history = model.fit(
x=x_train,
y=y_train, # 训练集
batch_size=batch_size, # 小批量每次投喂数量
epochs=num_epochs, # 训练次数
validation_split=0.05, # 用于验证集的比例
)
end_time = datetime.datetime.now()
time_cost = end_time-start_time
print('time_cost: ', time_cost)
# model.save('leNet_model.h5')
print(model.evaluate(x=x_test, y=y_test, batch_size=5))
# # 训练出的参数
# model.trainable_variables
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_2 (Conv2D) (None, 60, 60, 6) 456
_________________________________________________________________
average_pooling2d_2 (Average (None, 30, 30, 6) 0
_________________________________________________________________
conv2d_3 (Conv2D) (None, 26, 26, 16) 2416
_________________________________________________________________
average_pooling2d_3 (Average (None, 13, 13, 16) 0
_________________________________________________________________
flatten_1 (Flatten) (None, 2704) 0
_________________________________________________________________
dense_3 (Dense) (None, 120) 324600
_________________________________________________________________
dense_4 (Dense) (None, 84) 10164
_________________________________________________________________
dense_5 (Dense) (None, 10) 850
=================================================================
Total params: 338,486
Trainable params: 338,486
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
163/163 [==============================] - 4s 18ms/step - loss: 2.2975 - accuracy: 0.1267 - val_loss: 1.6140 - val_accuracy: 0.5349
Epoch 2/10
163/163 [==============================] - 2s 15ms/step - loss: 1.2181 - accuracy: 0.6169 - val_loss: 0.6109 - val_accuracy: 0.7674
Epoch 3/10
163/163 [==============================] - 2s 15ms/step - loss: 0.5246 - accuracy: 0.8059 - val_loss: 0.3084 - val_accuracy: 0.8837
Epoch 4/10
163/163 [==============================] - 2s 15ms/step - loss: 0.3058 - accuracy: 0.8960 - val_loss: 0.4523 - val_accuracy: 0.8837
Epoch 5/10
163/163 [==============================] - 2s 15ms/step - loss: 0.1967 - accuracy: 0.9395 - val_loss: 0.2326 - val_accuracy: 0.9070
Epoch 6/10
163/163 [==============================] - 2s 15ms/step - loss: 0.1553 - accuracy: 0.9492 - val_loss: 0.1664 - val_accuracy: 0.9302
Epoch 7/10
163/163 [==============================] - 2s 15ms/step - loss: 0.1243 - accuracy: 0.9523 - val_loss: 0.1282 - val_accuracy: 0.9535
Epoch 8/10
163/163 [==============================] - 3s 16ms/step - loss: 0.0492 - accuracy: 0.9842 - val_loss: 0.1135 - val_accuracy: 0.9535
Epoch 9/10
163/163 [==============================] - 2s 15ms/step - loss: 0.0701 - accuracy: 0.9726 - val_loss: 0.1318 - val_accuracy: 0.9535
Epoch 10/10
163/163 [==============================] - 2s 15ms/step - loss: 0.1474 - accuracy: 0.9525 - val_loss: 0.1397 - val_accuracy: 0.9535
time_cost: 0:00:25.917300
9/9 [==============================] - 0s 7ms/step - loss: 0.2294 - accuracy: 0.9556
[0.2294251173734665, 0.9555555582046509]
当学习率为0.01,训练10次时,训练出来的参数在测试集上的准确率为95.6%
下面将测试集的真实与预测结果绘制出来:
'''绘制测试集结果'''
plt.figure(figsize=(15, 10))
for image_index in range(len(y_test)):
# 预测
pred = model.predict(x_test[image_index].reshape(1, 64, 64, 3))
# 显示
ax = plt.subplot(int(np.ceil(len(y_test) / 5)), 5, image_index + 1)
ax.set_title('predict: {}'.format(pred.argmax() + 1), font='Consolas')
plt.imshow(x_test[image_index].reshape(64, 64, 3))
# plt.savefig("predict_num.jpg")
plt.subplots_adjust(hspace=1.5)
plt.show()
下面观察随着训练次数,训练集合测试集的准确率和误差分别的变化对比:
'''使用history将训练集和测试集的loss和acc调出来'''
acc = history.history['accuracy'] # 训练集准确率
val_acc = history.history['val_accuracy'] # 测试集准确率
loss = history.history['loss'] # 训练集损失
val_loss = history.history['val_loss'] # 测试集损失
'''绘制训练集和测试集的误差和准确度'''
plt.figure(figsize=(12, 3))
plt.subplot(121)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.title('Training and Validation Accuracy', font='Consolas')
[label.set_fontname('Consolas') for label in plt.legend().get_texts()]
plt.subplot(122)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss',font='Consolas')
[label.set_fontname('Consolas') for label in plt.legend().get_texts()]
plt.show()
下面来看一下其他手写中文数字的识别效果
from PIL import Image # 操作图像
import os # 创建删除文件等
plt.figure(figsize=(15, 3))
i = 0
for root, dirs, files in os.walk('./Pictures/test_picture'):
print(files)
for image_name in files:
i = i + 1
image = Image.open('./Pictures/test_picture/' + image_name)
image = image.convert('RGB').resize((64, 64))
image = np.array(image)/255
pred = model.predict(image.reshape(1, 64, 64, 3))
ax = plt.subplot(1, 9, i)
ax.set_title('predict: {}'.format(pred.argmax() + 1), font='Consolas')
plt.imshow(image)
plt.subplots_adjust(wspace=1)
plt.show()
['1.png', '10.png', '2.png', '3.png', '4.png', '5.png', '7.png', '8.png', '9.png']