TensorFlow 卷积神经网络之水果识别

是我写的没错

1. 数据处理

2. 设计神经网络

3. 训练

4. 测试


 

数据集来源于Kaggle,大约有500个苹果和500个香蕉。

1. 数据处理

将图片数据处理为 tf 能够识别的数据格式,并将数据设计批次。

    第一步get_files() 方法读取图片,然后根据图片名,添加 label,然后再将 image和label 放到 数组中,打乱顺序返回
    将第一步处理好的图片 和label 数组 转化为 tensorflow 能够识别的格式,然后将图片裁剪和补充进行标准化处理,分批次返回。

新建数据处理文件 ,文件名 input_data.py

import os
import math
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

# ============================================================================
# -----------------生成图片路径和标签的List------------------------------------

train_dir = 'C:/Users/Administrator/Desktop/水果识别/input_data'

apples = []
label_apples = []
bananas = []
label_bananas = []


# step1:获取所有的图片路径名,存放到
# 对应的列表中,同时贴上标签,存放到label列表中。
def get_files(file_dir, ratio):
    for file in os.listdir(file_dir + '/苹果'):
        apples.append(file_dir + '/苹果' + '/' + file)
        label_apples.append(0)
    for file in os.listdir(file_dir + '/香蕉'):
        bananas.append(file_dir + '/香蕉' + '/' + file)
        label_bananas.append(1)

    # step2:对生成的图片路径和标签List做打乱处理
    image_list = np.hstack((apples, bananas))
    label_list = np.hstack((label_apples, label_bananas))

    # 利用shuffle打乱顺序
    temp = np.array([image_list, label_list])
    temp = temp.transpose()
    np.random.shuffle(temp)

    # 从打乱的temp中再取出list(img和lab)
    # image_list = list(temp[:, 0])
    # label_list = list(temp[:, 1])
    # label_list = [int(i) for i in label_list]
    # return image_list, label_list

    # 将所有的img和lab转换成list
    all_image_list = list(temp[:, 0])
    all_label_list = list(temp[:, 1])

    # 将所得List分为两部分,一部分用来训练tra,一部分用来测试val
    # ratio是测试集的比例
    n_sample = len(all_label_list)
    n_val = int(math.ceil(n_sample * ratio))  # 测试样本数
    n_train = n_sample - n_val  # 训练样本数

    tra_images = all_image_list[0:n_train]
    tra_labels = all_label_list[0:n_train]
    tra_labels = [int(float(i)) for i in tra_labels]
    val_images = all_image_list[n_train:-1]
    val_labels = all_label_list[n_train:-1]
    val_labels = [int(float(i)) for i in val_labels]

    return tra_images, tra_labels, val_images, val_labels


# ---------------------------------------------------------------------------
# --------------------生成Batch----------------------------------------------

# step1:将上面生成的List传入get_batch() ,转换类型,产生一个输入队列queue,因为img和lab
# 是分开的,所以使用tf.train.slice_input_producer(),然后用tf.read_file()从队列中读取图像
#   image_W, image_H, :设置好固定的图像高度和宽度
#   设置batch_size:每个batch要放多少张图片
#   capacity:一个队列最大多少
def get_batch(image, label, image_W, image_H, batch_size, capacity):
    # 转换类型
    image = tf.cast(image, tf.string)
    label = tf.cast(label, tf.int32)

    # make an input queue
    input_queue = tf.train.slice_input_producer([image, label])

    label = input_queue[1]
    image_contents = tf.read_file(input_queue[0])  # read img from a queue

    # step2:将图像解码,不同类型的图像不能混在一起,要么只用jpeg,要么只用png等。
    image = tf.image.decode_jpeg(image_contents, channels=3)

    # step3:数据预处理,对图像进行旋转、缩放、裁剪、归一化等操作,让计算出的模型更健壮。
    image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
    image = tf.image.per_image_standardization(image)

    # step4:生成batch
    # image_batch: 4D tensor [batch_size, width, height, 3],dtype=tf.float32
    # label_batch: 1D tensor [batch_size], dtype=tf.int32
    image_batch, label_batch = tf.train.batch([image, label],
                                              batch_size=batch_size,
                                              num_threads=32,
                                              capacity=capacity)
    # 重新排列label,行数为[batch_size]
    label_batch = tf.reshape(label_batch, [batch_size])
    image_batch = tf.cast(image_batch, tf.float32)
    return image_batch, label_batch

2. 设计神经网络

利用卷积神经网路处理,网络结构为

# conv1   卷积层 1
# pooling1_lrn  池化层 1
# conv2  卷积层 2
# pooling2_lrn 池化层 2
# local3 全连接层 1
# local4 全连接层 2
# softmax 全连接层 3

新建神经网络文件 ,文件名 model.py

# =========================================================================
import tensorflow as tf


# =========================================================================
# 网络结构定义
# 输入参数:images,image batch、4D tensor、tf.float32、[batch_size, width, height, channels]
# 返回参数:logits, float、 [batch_size, n_classes]
def inference(images, batch_size, n_classes):
    # 一个简单的卷积神经网络,卷积+池化层x2,全连接层x2,最后一个softmax层做分类。
    # 卷积层1
    # 64个3x3的卷积核(3通道),padding=’SAME’,表示padding后卷积的图与原图尺寸一致,激活函数relu()
    with tf.variable_scope('conv1') as scope:
        weights = tf.Variable(tf.truncated_normal(shape=[3, 3, 3, 64], stddev=1.0, dtype=tf.float32),
                              name='weights', dtype=tf.float32)

        biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[64]),
                             name='biases', dtype=tf.float32)

        conv = tf.nn.conv2d(images, weights, strides=[1, 1, 1, 1], padding='SAME')
        pre_activation = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(pre_activation, name=scope.name)

    # 池化层1
    # 3x3最大池化,步长strides为2,池化后执行lrn()操作,局部响应归一化,对训练有利。
    with tf.variable_scope('pooling1_lrn') as scope:
        pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pooling1')
        norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')

    # 卷积层2
    # 16个3x3的卷积核(16通道),padding=’SAME’,表示padding后卷积的图与原图尺寸一致,激活函数relu()
    with tf.variable_scope('conv2') as scope:
        weights = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 16], stddev=0.1, dtype=tf.float32),
                              name='weights', dtype=tf.float32)

        biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[16]),
                             name='biases', dtype=tf.float32)

        conv = tf.nn.conv2d(norm1, weights, strides=[1, 1, 1, 1], padding='SAME')
        pre_activation = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(pre_activation, name='conv2')

    # 池化层2
    # 3x3最大池化,步长strides为2,池化后执行lrn()操作,
    # pool2 and norm2
    with tf.variable_scope('pooling2_lrn') as scope:
        norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
        pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pooling2')

    # 全连接层3
    # 128个神经元,将之前pool层的输出reshape成一行,激活函数relu()
    with tf.variable_scope('local3') as scope:
        reshape = tf.reshape(pool2, shape=[batch_size, -1])
        dim = reshape.get_shape()[1].value
        weights = tf.Variable(tf.truncated_normal(shape=[dim, 128], stddev=0.005, dtype=tf.float32),
                              name='weights', dtype=tf.float32)

        biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[128]),
                             name='biases', dtype=tf.float32)

        local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)

    # 全连接层4
    # 128个神经元,激活函数relu()
    with tf.variable_scope('local4') as scope:
        weights = tf.Variable(tf.truncated_normal(shape=[128, 128], stddev=0.005, dtype=tf.float32),
                              name='weights', dtype=tf.float32)

        biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[128]),
                             name='biases', dtype=tf.float32)

        local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')

    # dropout层
    #    with tf.variable_scope('dropout') as scope:
    #        drop_out = tf.nn.dropout(local4, 0.8)

    # Softmax回归层
    # 将前面的FC层输出,做一个线性回归,计算出每一类的得分
    with tf.variable_scope('softmax_linear') as scope:
        weights = tf.Variable(tf.truncated_normal(shape=[128, n_classes], stddev=0.005, dtype=tf.float32),
                              name='softmax_linear', dtype=tf.float32)

        biases = tf.Variable(tf.constant(value=0.1, dtype=tf.float32, shape=[n_classes]),
                             name='biases', dtype=tf.float32)

        softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')

    return softmax_linear


# -----------------------------------------------------------------------------
# loss计算
# 传入参数:logits,网络计算输出值。labels,真实值,在这里是0或者1
# 返回参数:loss,损失值
def losses(logits, labels):
    with tf.variable_scope('loss') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels,
                                                                       name='xentropy_per_example')
        loss = tf.reduce_mean(cross_entropy, name='loss')
        tf.summary.scalar(scope.name + '/loss', loss)
    return loss


# --------------------------------------------------------------------------
# loss损失值优化
# 输入参数:loss。learning_rate,学习速率。
# 返回参数:train_op,训练op,这个参数要输入sess.run中让模型去训练。
def trainning(loss, learning_rate):
    with tf.name_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op = optimizer.minimize(loss, global_step=global_step)
    return train_op


# -----------------------------------------------------------------------
# 评价/准确率计算
# 输入参数:logits,网络计算值。labels,标签,也就是真实值,在这里是0或者1。
# 返回参数:accuracy,当前step的平均准确率,也就是在这些batch中多少张图片被正确分类了。
def evaluation(logits, labels):
    with tf.variable_scope('accuracy') as scope:
        correct = tf.nn.in_top_k(logits, labels, 1)
        correct = tf.cast(correct, tf.float16)
        accuracy = tf.reduce_mean(correct)
        tf.summary.scalar(scope.name + '/accuracy', accuracy)
    return accuracy

3. 训练数据,并将训练的模型存储

新建训练文件,文件名train.py

# 导入文件
import os
import numpy as np
import tensorflow as tf
import input_data
import model

# 变量声明
N_CLASSES = 2 # 此处分为两类(苹果 、香蕉)
IMG_W = 299  # resize图像,太大的话训练时间久
IMG_H = 299
BATCH_SIZE = 20
CAPACITY = 200
MAX_STEP = 10000  # 一般大于10K
learning_rate = 0.0001  # 一般小于0.0001

# 获取批次batch
train_dir = 'C:/Users/Administrator/Desktop/水果识别'  # 训练样本的读入路径
logs_train_dir = 'C:/Users/Administrator/Desktop/水果识别/save'  # logs存储路径

# train, train_label = input_data.get_files(train_dir)
train, train_label, val, val_label = input_data.get_files(train_dir, 0.3)
# 训练数据及标签
train_batch, train_label_batch = input_data.get_batch(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)
# 测试数据及标签
val_batch, val_label_batch = input_data.get_batch(val, val_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)

# 训练操作定义
train_logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES)
train_loss = model.losses(train_logits, train_label_batch)
train_op = model.trainning(train_loss, learning_rate)
train_acc = model.evaluation(train_logits, train_label_batch)

# 测试操作定义
test_logits = model.inference(val_batch, BATCH_SIZE, N_CLASSES)
test_loss = model.losses(test_logits, val_label_batch)
test_acc = model.evaluation(test_logits, val_label_batch)

# 这个是log汇总记录
summary_op = tf.summary.merge_all()

# 产生一个会话
sess = tf.Session()
# 产生一个writer来写log文件
train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
# val_writer = tf.summary.FileWriter(logs_test_dir, sess.graph)
# 产生一个saver来存储训练好的模型
saver = tf.train.Saver()
# 所有节点初始化
sess.run(tf.global_variables_initializer())
# 队列监控
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

# 进行batch的训练
try:
    # 执行MAX_STEP步的训练,一步一个batch
    for step in np.arange(MAX_STEP):
        if coord.should_stop():
            break
        _, tra_loss, tra_acc = sess.run([train_op, train_loss, train_acc])

        # 每隔50步打印一次当前的loss以及acc,同时记录log,写入writer
        if step % 10 == 0:
            print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0))
            summary_str = sess.run(summary_op)
            train_writer.add_summary(summary_str, step)
        # 每隔100步,保存一次训练好的模型
        if (step + 1) == MAX_STEP:
            checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)

except tf.errors.OutOfRangeError:
    print('Done training -- epoch limit reached')

finally:
    coord.request_stop()

4.测试

利用训练好的模型,进行水果识别

新建测试文件,文件名test.py

from PIL import Image
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import model

import winsound
import win32com.client
from input_data import get_files
speak_out = win32com.client.Dispatch('SAPI.SPVOICE')

# 调用声音接口发出声音
def speak(str):
    print(str)
    speak_out.Speak(str)
    winsound.PlaySound(str,winsound.SND_ASYNC)

# 获取一张图片
def get_one_image(train):
    # 输入参数:train,训练图片的路径
    # 返回参数:image,从训练图片中随机抽取一张图片
    n = len(train)
    ind = np.random.randint(0, n)
    img_dir = train[ind]  # 随机选择测试的图片
    img = Image.open(img_dir)
    plt.imshow(img)
    plt.show()
    image = np.array(img)
    return image


# 测试图片
def evaluate_one_image(image_array):
    with tf.Graph().as_default():
        BATCH_SIZE = 1
        N_CLASSES = 2

        image = tf.cast(image_array, tf.float32)
        image = tf.image.per_image_standardization(image)
        image = tf.reshape(image, [1, 299, 299, 3])

        logit = model.inference(image, BATCH_SIZE, N_CLASSES)

        logit = tf.nn.softmax(logit)

        x = tf.placeholder(tf.float32, shape=[299, 299, 3])

        # you need to change the directories to yours.
        logs_train_dir = '/Users/Administrator/Desktop/水果识别/save/'

        saver = tf.train.Saver()

        with tf.Session() as sess:

            print("Reading checkpoints...")
            ckpt = tf.train.get_checkpoint_state(logs_train_dir)
            if ckpt and ckpt.model_checkpoint_path:
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                saver.restore(sess, ckpt.model_checkpoint_path)
                print('Loading success, global_step is %s' % global_step)
            else:
                print('No checkpoint file found')
            prediction = sess.run(logit, feed_dict={x: image_array})
            max_index = np.argmax(prediction)
            print(max_index)
            if max_index == 0:
                result = ('这是苹果的可能性为: %.6f' % prediction[:, 0])
                speak('这是苹果')
            elif max_index == 1:
                result = ('这是香蕉的可能性为: %.6f' % prediction[:, 1])
                speak('这是香蕉')
            print(result)
            return result


# ------------------------------------------------------------------------

if __name__ == '__main__':
    img = Image.open('C:/Users/Administrator/Desktop/水果识别/ta.jpg')
    plt.imshow(img)
    plt.show()
    imag = img.resize([299, 299])
    image = np.array(imag)
    evaluate_one_image(image)

新建文件video.py,用于调用摄像头拍下图像,识别水果

import cv2
from PIL import Image

cap = cv2.VideoCapture(0)  # 打开摄像头

while (1):
    # get a frame
    ret, frame = cap.read()
    # show a frame
    cv2.imshow("capture", frame)  # 生成摄像头窗口

    if cv2.waitKey(1) & 0xFF == ord('q'):  # 如果按下q 就截图保存并退出
        cv2.imwrite("C:/Users/Administrator/Desktop/水果识别/te.jpg", frame)  # 保存路径
        break

cap.release()
cv2.destroyAllWindows()

# 将图片处理成指定的格式 这里为299*299
image=Image.open("C:/Users/Administrator/Desktop/水果识别/te.jpg")
image_size=image.resize((299,299),Image.ANTIALIAS)
image_size.save("C:/Users/Administrator/Desktop/水果识别/ta.jpg")

 

你可能感兴趣的:(人工智能)