lenet-5模型实现mnist手写数字识别

lenet-5实现mnist手写数字识别

关于lenet-5模型已经mnist我就不多复述了,网上很多关于这些的简介,所以我就直接上代码了。

新手入门,代码也是很不完善,以下代码仅限参考

1、mnist_lenet5_forward.py (前向传播)

#coding:utf-8
import tensorflow as tf
IMAGE_SIZE = 28
NUM_CHANNELS = 1
CONV1_SIZE = 5
CONV1_KERNEL_NUM = 32
CONV2_SIZE = 5
CONV2_KERNEL_NUM = 64
FC_SIZE = 512
OUTPUT_NODE = 10

def get_weight(shape, regularizer):
	w = tf.Variable(tf.truncated_normal(shape,stddev=0.1))
	if regularizer != None: tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w)) 
	return w

def get_bias(shape): 
	b = tf.Variable(tf.zeros(shape))  
	return b

def conv2d(x,w):  
	return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):  
	return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 

def forward(x, train, regularizer):
    conv1_w = get_weight([CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_KERNEL_NUM], regularizer) 
    conv1_b = get_bias([CONV1_KERNEL_NUM]) 
    conv1 = conv2d(x, conv1_w) 
    relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_b)) 
    pool1 = max_pool_2x2(relu1) 

    conv2_w = get_weight([CONV2_SIZE, CONV2_SIZE, CONV1_KERNEL_NUM, CONV2_KERNEL_NUM],regularizer) 
    conv2_b = get_bias([CONV2_KERNEL_NUM])
    conv2 = conv2d(pool1, conv2_w) 
    relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_b))
    pool2 = max_pool_2x2(relu2)

    pool_shape = pool2.get_shape().as_list() 
    nodes = pool_shape[1] * pool_shape[2] * pool_shape[3] 
    reshaped = tf.reshape(pool2, [pool_shape[0], nodes]) 

    fc1_w = get_weight([nodes, FC_SIZE], regularizer) 
    fc1_b = get_bias([FC_SIZE]) 
    fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_w) + fc1_b) 
    if train: fc1 = tf.nn.dropout(fc1, 0.5)

    fc2_w = get_weight([FC_SIZE, OUTPUT_NODE], regularizer)
    fc2_b = get_bias([OUTPUT_NODE])
    y = tf.matmul(fc1, fc2_w) + fc2_b
    return y 

2、mnist_lenet5_generateds.py (生成tfRecord文件)

# coding:utf-8
import tensorflow as tf
import numpy as np
from PIL import Image
import os
import glob

image_train_path = './fashion_mnist_png/fashion_mnist_train/'
# label_train_path = './fashion_mnist_png/mnist_train_jpg_60000.txt'
tfRecord_train = './fashion_data/fashion_mnist_train.tfrecords'

image_test_path = './fashion_mnist_png/fashion_mnist_test/'
# label_test_path = './fashion_mnist_png/mnist_test_jpg_10000.txt'
tfRecord_test = './fashion_data/fashion_mnist_test.tfrecords'

data_path = './fashion_data'
resize_height = 28
resize_width = 28


def write_tfRecord(tfRecordName, image_path): #, label_path
    writer = tf.python_io.TFRecordWriter(tfRecordName)
    # num_pic = 0
    # f = open(label_path, 'r')
    # contents = f.readlines()
    # f.close()
    for i in range(10):
        img_path_names = glob.glob(image_path+"{}/*.png".format(i))
        for img_path_name in img_path_names:
            img = Image.open(img_path_name)
            img_raw = img.tobytes()
            labels = [0] * 10
            labels[i] = 1

            example = tf.train.Example(features=tf.train.Features(feature={
                'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
                'label': tf.train.Feature(int64_list=tf.train.Int64List(value=labels))
            }))
            writer.write(example.SerializeToString())
            # num_pic += 1
            # print("the number of picture:", num_pic)
            print("the number of picture:{}in{}".format(img_path_name,i))

    writer.close()
    print("write tfrecord successful")

    # for content in contents:
    #     value = content.split()
    #     img_path = image_path + value[0]
    #     img = Image.open(img_path)
    #     img_raw = img.tobytes()
    #     labels = [0] * 10
    #     labels[int(value[1])] = 1
    #
    #     example = tf.train.Example(features=tf.train.Features(feature={
    #         'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
    #         'label': tf.train.Feature(int64_list=tf.train.Int64List(value=labels))
    #     }))
    #     writer.write(example.SerializeToString())
    #     num_pic += 1
    #     print("the number of picture:", num_pic)
    # writer.close()
    # print("write tfrecord successful")


def generate_tfRecord():
    isExists = os.path.exists(data_path)
    if not isExists:
        os.makedirs(data_path)
        print('The directory was created successfully')
    else:
        print('directory already exists')
    write_tfRecord(tfRecord_train, image_train_path) #, label_train_path
    write_tfRecord(tfRecord_test, image_test_path) #, label_test_path


def read_tfRecord(tfRecord_path):
    filename_queue = tf.train.string_input_producer([tfRecord_path], shuffle=True)
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(serialized_example,
                                       features={
                                           'label': tf.FixedLenFeature([10], tf.int64),
                                           'img_raw': tf.FixedLenFeature([], tf.string)
                                       })
    img = tf.decode_raw(features['img_raw'], tf.uint8)
    img.set_shape([784])
    img = tf.cast(img, tf.float32) * (1. / 255)
    label = tf.cast(features['label'], tf.float32)
    return img, label


def get_tfrecord(num, isTrain=True):
    if isTrain:
        tfRecord_path = tfRecord_train
    else:
        tfRecord_path = tfRecord_test
    img, label = read_tfRecord(tfRecord_path)
    img_batch, label_batch = tf.train.shuffle_batch([img, label],
                                                    batch_size=num,
                                                    num_threads=2,
                                                    capacity=1000,
                                                    min_after_dequeue=700)
    return img_batch, label_batch


def main():
    generate_tfRecord()


if __name__ == '__main__':
    main()

生成的tfRecord文件如下图:
在这里插入图片描述

3、mnist_lenet5_backward.py (反向传播)

#coding:utf-8
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_lenet5_forward
import os
import numpy as np
import mnist_generateds

BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.1
LEARNING_RATE_DECAY = 0.99 
REGULARIZER = 0.0001 
STEPS = 50000 
MOVING_AVERAGE_DECAY = 0.99 
MODEL_SAVE_PATH="./model/" 
MODEL_NAME="mnist_model"
mnist_lenet5_examples = 60000

def backward():
    x = tf.placeholder(tf.float32, [
	BATCH_SIZE,
	mnist_lenet5_forward.IMAGE_SIZE,
	mnist_lenet5_forward.IMAGE_SIZE,
	mnist_lenet5_forward.NUM_CHANNELS]) 
    y_ = tf.placeholder(tf.float32, [None, mnist_lenet5_forward.OUTPUT_NODE])
    y = mnist_lenet5_forward.forward(x,True, REGULARIZER) 
    global_step = tf.Variable(0, trainable=False) 

    ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cem = tf.reduce_mean(ce) 
    loss = cem + tf.add_n(tf.get_collection('losses')) 

    learning_rate = tf.train.exponential_decay( 
        LEARNING_RATE_BASE,
        global_step,
        mnist_lenet5_examples / BATCH_SIZE,
		LEARNING_RATE_DECAY,
        staircase=True) 
    
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

    ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    ema_op = ema.apply(tf.trainable_variables())
    with tf.control_dependencies([train_step, ema_op]): 
        train_op = tf.no_op(name='train')

    saver = tf.train.Saver()
    img_batch,lable_batch = mnist_generateds.get_tfrecord(BATCH_SIZE, isTrain=True)

    with tf.Session() as sess: 
        init_op = tf.global_variables_initializer() 
        sess.run(init_op) 

        ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH) 
        if ckpt and ckpt.model_checkpoint_path:
        	saver.restore(sess, ckpt.model_checkpoint_path)

        coord = tf.train.Coordinator()  # 3
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)  # 4

        for i in range(STEPS):
            xs, ys = sess.run([img_batch, lable_batch])
            reshaped_xs = np.reshape(xs,(  
		    BATCH_SIZE,
        	mnist_lenet5_forward.IMAGE_SIZE,
        	mnist_lenet5_forward.IMAGE_SIZE,
        	mnist_lenet5_forward.NUM_CHANNELS))
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: reshaped_xs, y_: ys}) 
            if i % 100 == 0:
                print("After %d training step(s), loss on training batch is %g." % (step, loss_value))
                saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)

        coord.request_stop()  # 6
        coord.join(threads)  # 7

def main():
    # mnist = input_data.read_data_sets("./data/", one_hot=True)
    backward()

if __name__ == '__main__':
    main()

运行结果如下:
lenet-5模型实现mnist手写数字识别_第1张图片

生成的模型文件如图所示 :
lenet-5模型实现mnist手写数字识别_第2张图片

4、mnist_lenet5_test.py (调用模型进行测试)

import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_lenet5_forward
import mnist_lenet5_backward
import mnist_generateds
import numpy as np

test_num_examples = 10000
TEST_INTERVAL_SECS=5

def test():
    with tf.Graph().as_default() as g:
        x=tf.placeholder(tf.float32,[
            test_num_examples,
            mnist_lenet5_forward.IMAGE_SIZE,
            mnist_lenet5_forward.IMAGE_SIZE,
            mnist_lenet5_forward.NUM_CHANNELS
        ])
        y_=tf.placeholder(tf.float32,[None,mnist_lenet5_forward.OUTPUT_NODE])
        y=mnist_lenet5_forward.forward(x,False,None)

        ema=tf.train.ExponentialMovingAverage(mnist_lenet5_backward.MOVING_AVERAGE_DECAY)
        ema_restore=ema.variables_to_restore()
        saver =tf.train.Saver(ema_restore)

        correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

        img_batch, lable_batch = mnist_generateds.get_tfrecord(test_num_examples, isTrain=True)

        while True:
            with tf.Session() as sess:
                ckpt = tf.train.get_checkpoint_state(mnist_lenet5_backward.MODEL_SAVE_PATH)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(sess,ckpt.model_checkpoint_path)

                    global_step=ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]

                    coord = tf.train.Coordinator()
                    threads = tf.train.start_queue_runners(sess=sess,coord=coord)

                    xs, ys = sess.run([img_batch, lable_batch])
                    reshaped_x = np.reshape(xs,(
                        test_num_examples,
                        mnist_lenet5_forward.IMAGE_SIZE,
                        mnist_lenet5_forward.IMAGE_SIZE,
                        mnist_lenet5_forward.NUM_CHANNELS))

                    accuracy_score = sess.run(accuracy,feed_dict={x:reshaped_x,y_:ys})

                    print('训练%s轮后,精确率为 %g'%(global_step,accuracy_score))

                    coord.request_stop()
                    coord.join(threads)

                else:

                    print('找不到模型文件')
                    return

            time.sleep(TEST_INTERVAL_SECS)

def main():
    # mnist=input_data.read_data_sets("./data/",one_hot=True)
    test()

if __name__ == '__main__':
    main()

测试结果如图:
在训练50000轮以后基本准确率稳在97%(其实在1000轮就已经90%了)
lenet-5模型实现mnist手写数字识别_第3张图片
5、mnist_app.py(将模型进行应用)

# mnist_app.py
# coding:utf-8
import tensorflow as tf
import numpy as np
from PIL import Image
import mnist_lenet5_forward
import mnist_lenet5_backward


def restore_model(testPicArr):
	# 重现计算图
	with tf.Graph().as_default() as tg:
		x = tf.placeholder(tf.float32, [
			1,
			mnist_lenet5_forward.IMAGE_SIZE,
			mnist_lenet5_forward.IMAGE_SIZE,
			mnist_lenet5_forward.NUM_CHANNELS
		])
		y = mnist_lenet5_forward.forward(x,False,None)
		#y = tf.placeholder(tf.float32, [x, mnist_lenet5_forward.OUTPUT_NODE])
		#y = mnist_lenet5_forward.forward(x, False, None)

		preValue = tf.argmax(y, 1) # y 的最大值对应的列表索引号

		# 实例化带有滑动平均值的saver
		variable_averages = tf.train.ExponentialMovingAverage(
			mnist_lenet5_backward.MOVING_AVERAGE_DECAY)
		variables_to_restore = variable_averages.variables_to_restore()
		saver = tf.train.Saver(variables_to_restore)

		# 用with结构加载ckpt
		with tf.Session() as sess:
			ckpt = tf.train.get_checkpoint_state(mnist_lenet5_backward.MODEL_SAVE_PATH)
			# 如果ckpt存在,恢复ckpt的参数和信息到当前会话
			if ckpt and ckpt.model_checkpoint_path:
				saver.restore(sess, ckpt.model_checkpoint_path)

				# 把刚刚准备好的图片喂入网络,执行预测操作
				preValue = sess.run(preValue, feed_dict = {x: testPicArr})
				return preValue
			else:
				print("No checkpoint file found!")
				return -1


def pre_pic(picName):
	# 打开图片
	img = Image.open(picName)
	# 用消除锯齿的方法resize图片尺寸
	reIm = img.resize((28, 28), Image.ANTIALIAS)
	# 转化成灰度图,并转化成矩阵
	im_arr = np.array(reIm.convert('L'))
	# 二值化阈值
	threshold = 50
	# 模型要求黑底白字,故需要进行反色
	for i in range(28):
		for j in range(28):
			im_arr[i][j] = 255 - im_arr[i][j]
			# 二值化,过滤噪声,留下主要特征
			if(im_arr[i][j] < threshold):
				im_arr[i][j] = 0
			else: im_arr[i][j] = 255
	# 整理矩阵形状
	nm_arr = im_arr.reshape([1, 28,28,1])
	# 由于模型要求是浮点数,先改为浮点型
	nm_arr = nm_arr.astype(np.float32)
	# 0255浮点转化成01浮点
	img_ready = np.multiply(nm_arr, 1.0/255.0)

	# 返回预处理完的图片
	return img_ready

def application():
	# 输入要识别的图片数目 # input从控制台读入返回的是str型!!!
	testNum = int(input("输入测试图片数量:") )
	for i in range(testNum):
		# 给出识别图片的路径 # raw_input从控制台读入字符串
		testPic = input("测试图片的路径:")
		# 接收的图片需进行预处理
		testPicArr = pre_pic(testPic)
		# 把整理好的图片喂入神经网络
		preValue = restore_model(testPicArr)
		# 输出预测结果
		print("预测数字是:", preValue)


def main():
	application()

if __name__ == '__main__':
	main()

预测结果输出:
(图片9.png确实是8,我图片名称乱打的)
lenet-5模型实现mnist手写数字识别_第4张图片

lenet-5模型介绍到此结束。
由于篇幅太长,代码中涉及的优化函数,损失函数,还有激活函数,这些知识可能希望大家自己慢慢地去了解。

声明:所写的博客本人学习《TensorFlow实战Google深度学习框架》(第2版),所得心得,转载请注明出处。

你可能感兴趣的:(mnist手写数字识别)