LeNet-5出自论文Gradient-Based Learning Applied to Document Recognition,是一种用于手写体字符识别的非常高效的卷积神经网络。
卷积神经网络能够很好的利用图像的结构信息。LeNet-5是一个较简单的卷积神经网络。下图显示了其结构:输入的二维图像,先经过两次卷积层到池化层,再经过全连接层,最后使用softmax分类作为输出层。下面我们主要介绍卷积层和池化层。
1、INPUT层-输入层
2、C1层-卷积层
3、S2层-池化层(下采样层)
4、C3层-卷积层
5、S4层-池化层(下采样层)
6、C5层-卷积层
7、F6层-全连接层
8、Output层-全连接层
#handwrite_Lenet_Tensorflow_train.py
#coding=utf-8
"""
参考:https://blog.csdn.net/suyunzzz/article/details/104195872
参考:https://cuijiahua.com/blog/2018/01/dl_3.html
"""
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import datetime
print ("start")
def train_model():
mnist=tf.keras.datasets.mnist
#获取数据,训练集,测试集 60k训练,10K测试
(x_train,y_train),(x_test,y_test)=mnist.load_data()
#print (x_train.shape,y_train.shape)
#print (x_test.shape,y_test.shape)
#首先是数据 INPUT 层,输入图像的尺寸统一归一化为32*32。
#LeNet Input 为32*32
x_train= np.pad(x_train,((0,0),(2,2),(2,2)),'constant',constant_values=0) #28*28-》32*32
x_test= np.pad(x_test,((0,0),(2,2),(2,2)),'constant',constant_values=0) #28*28-》32*32
#print(x_train.shape,x_test.shape)
#数据集格式转换
x_train=x_train.astype('float32')
x_train=x_train.astype('float32')
#归一化,就是为了限定你的输入向量的最大值跟最小值不超过你的隐层跟输出层函数的限定范围。
x_train=x_train/255#归一化
x_test=x_test/255#归一化
x_train=x_train.reshape(x_train.shape[0],32,32,1)
x_test=x_test.reshape(x_test.shape[0],32,32,1)
print(x_train.shape,x_test.shape)
#模型实例化,根据LeNet 的结构
model=tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters=6,kernel_size=(5,5),padding='valid',activation=tf.nn.relu,input_shape=(32,32,1)),
tf.keras.layers.AveragePooling2D(pool_size=(2,2),strides=(2,2),padding='same'),
tf.keras.layers.Conv2D(filters=16,kernel_size=(5,5),padding='valid',activation=tf.nn.relu,input_shape=(32,32,1)),
tf.keras.layers.AveragePooling2D(pool_size=(2,2),strides=(2,2),padding='same'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(units=120,activation=tf.nn.relu),
tf.keras.layers.Dense(units=84,activation=tf.nn.relu),
tf.keras.layers.Dense(units=10,activation=tf.nn.softmax),
])
model.summary()
#模型训练
num_epochs=1#训练次数
batch_size=60#每个批次喂多少张图片
lr=0.001#学习率
#优化器
adam_optimizer=tf.keras.optimizers.Adam(lr)
model.compile(
optimizer=adam_optimizer,
loss=tf.keras.losses.sparse_categorical_crossentropy,
metrics=['accuracy']
)
start_time=datetime.datetime.now()
model.fit(
x=x_train,
y=y_train,
batch_size=batch_size,
epochs=num_epochs
)
end_time=datetime.datetime.now()
time_cost=end_time-start_time
print('time_cost: ',time_cost)
model.save('leNet_model.h5')
print(model.evaluate(x_test,y_test))
image_index=3
# 预测
pred=model.predict(x_test[image_index].reshape(1,32,32,1))
print("predict result:",pred.argmax())
# 显示
plt.imshow(x_test[image_index].reshape(32,32))
plt.savefig("predict_num.jpg")
plt.show()
train_model()
print ("end")
#handwrite_Lenet_Tensorflow_load.py
#coding=utf-8
import tensorflow as tf
mnist=tf.keras.datasets.mnist
import matplotlib.pyplot as plt
import matplotlib as m
import numpy as np
import cv2
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# np.set_printoptions(threshold=np.inf)
#加载模型
def digit_predict():
model=tf.keras.models.load_model('leNet_model.h5')
#图片预处理
img=cv2.imread('0.jpg')
print(img.shape)
plt.imshow(img)
plt.show()
#灰度图
img=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
print(img.shape)
plt.imshow(img,cmap='Greys')
plt.show()
#取反
img=cv2.bitwise_not(img)
plt.imshow(img,cmap='Greys')
plt.show()
print('二值化前:',img.shape)
print('二值化前:',img)
#纯黑 纯白 二值化
img[img<=100]=0
img[img>=140]=255
plt.imshow(img,cmap='Greys')
plt.show()
print('二值化后:',img.shape)
print('二值化后:',img)
#尺寸
img=cv2.resize(img,(32,32))
print('尺寸:',img.shape)
print('尺寸',img)
#归一化
img=img/255
print('归一化:',img.shape)
print('归一化:',img)#0和1组成
#预测
pred=model.predict(img.reshape(1,32,32,1))
print('prediction Number: ',pred.argmax())
#打印图片信息
plt.imshow(img)
plt.show()
digit_predict()