tensorflow实现猫狗识别

import os
import numpy as np
import time
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt

def get_all_files(file_path):
    """
    获取图片路径及其标签
    :param file_path: a sting, 图片所在目录
    :param is_random: True or False, 是否乱序
    :return:
    """
    image_list = []
    label_list = []
    cat_count = 0
    dog_count = 0
    for item in os.listdir(file_path):
        item_path = file_path + '\\' + item
        item_label = item.split('.')[0]  # 文件名形如  cat.0.jpg,只需要取第一个

        if os.path.isfile(item_path):
            image_list.append(item_path)
        else:
            raise ValueError('文件夹中有非文件项.')

        if item_label == 'cat':  # 猫标记为'0'
            label_list.append(0)
            cat_count += 1
        else:  # 狗标记为'1'
            label_list.append(1)
            dog_count += 1
    print('数据集中有%d只猫,%d只狗.' % (cat_count, dog_count))

    image_list = np.asarray(image_list)
    label_list = np.asarray(label_list)
    # 乱序文件

    rnd_index = np.arange(len(image_list))
    np.random.shuffle(rnd_index)
    image_list = image_list[rnd_index]
    label_list = label_list[rnd_index]

    return image_list, label_list

def get_batch(image,lable, image_size_w,image_size_h, batch_size, capacity):
    """
    获取训练批次
    :param train_list: 2-D list, [image_list, label_list]
    :param image_size: a int, 训练图像大小
    :param batch_size: a int, 每个批次包含的样本数量
    :param capacity: a int, 队列容量
    :param is_random: True or False, 是否乱序
    :return:
    """
    image =tf.cast(image,tf.string)
    lable =tf.cast(lable,tf.int32)
    intput_queue = tf.train.slice_input_producer([image,lable])

    # 从路径中读取图片
    image_train = tf.read_file(intput_queue[0])
    image_train = tf.image.decode_jpeg(image_train, channels=3)  # 这里是jpg格式
    image_train = tf.image.resize_images(image_train, [image_size_h, image_size_w])
    image_train = tf.cast(image_train, tf.float32) / 255.  # 转换数据类型并归一化

    # 图片标签
    label_train = intput_queue[1]

    # 获取批次

    image_train_batch, label_train_batch = tf.train.shuffle_batch([image_train, label_train],
                                                                  batch_size=batch_size,
                                                                  capacity=capacity,
                                                                  min_after_dequeue=100,
                                                                  num_threads=2)

    return image_train_batch, label_train_batch

def cnn_inference(images, batch_size, n_classes):

    # 第一层的卷积层conv1,卷积核为3X3,有16个
    with tf.variable_scope('conv1') as scope:
        # 建立weights和biases的共享变量
        # conv1, shape = [kernel size, kernel size, channels, kernel numbers]
        weights = tf.get_variable('weights',
                                  shape=[3, 3, 3, 16],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[16],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        conv = tf.nn.conv2d(images,
                            weights,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
        pre_activation = tf.nn.bias_add(conv, biases)  # 加入偏差
        conv1 = tf.nn.relu(pre_activation, name=scope.name)  # 加上激活函数非线性化处理,且是在conv1的命名空间

    # 第一层的池化层pool1和规范化norm1(特征缩放)
    with tf.variable_scope('pooling1_lrn') as scope:
        pool1 = tf.nn.max_pool(conv1,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 2, 2, 1],
                               padding='SAME',
                               name='pooling1')
        norm1 = tf.nn.lrn(pool1,
                          depth_radius=4,
                          bias=1.0,
                          alpha=0.001 / 9.0,
                          beta=0.75,
                          name='norm1')

    # 第二层的卷积层cov2
    with tf.variable_scope('conv2') as scope:
        weights = tf.get_variable('weights',
                                  shape=[3, 3, 16, 16],  # 这里只有第三位数字16需要等于上一层的tensor维度
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[16],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        conv = tf.nn.conv2d(norm1, weights, strides=[1, 1, 1, 1], padding='SAME')
        pre_activation = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(pre_activation, name='conv2')

    # 第二层的池化层pool2和规范化norm2
    with tf.variable_scope('pooling2_lrn') as scope:
        norm2 = tf.nn.lrn(conv2,
                          depth_radius=4,
                          bias=1.0,
                          alpha=0.001 / 9.0,
                          beta=0.75,
                          name='norm2')
        pool2 = tf.nn.max_pool(norm2,
                               ksize=[1, 2, 2, 1],
                               strides=[1, 1, 1, 1],
                               padding='SAME',
                               name='pooling2')
        # 这里选择了先规范化再池化

    with tf.variable_scope('local3') as scope:
        # flatten-把卷积过的多维tensor拉平成二维张量(矩阵)
        reshape = tf.reshape(pool2, shape=[batch_size, -1])  # batch_size表明了有多少个样本

        dim = reshape.get_shape()[1].value  # 知道-1(代表任意)这里具体是多少个
        weights = tf.get_variable('weights',
                                  shape=[dim, 256],  # 连接256个神经元
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[256],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)  # 矩阵相乘加上bias

    # 第四层为全连接层local4
    with tf.variable_scope('local4') as scope:
        weights = tf.get_variable('weights',
                                  shape=[256, 512],  # 再连接512个神经元
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[512],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')

    # 第六层为输出层softmax_linear
    with tf.variable_scope('softmax_linear') as scope:
        weights = tf.get_variable('weights',
                                  shape=[512, n_classes],
                                  dtype=tf.float32,
                                  initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
        biases = tf.get_variable('biases',
                                 shape=[n_classes],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.1))
        softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')
        # 这里只是命名为softmax_linear,真正的softmax函数放在下面的losses函数里面和交叉熵结合在一起了,这样可以提高运算速度。
        # softmax_linear的行数=local4的行数,列数=weights的列数=bias的行数=需要分类的个数
        # 经过softmax函数用于分类过程中,它将多个神经元的输出,映射到(0,1)区间内,可以看成概率来理解

    return softmax_linear

def losses(logits, labels):

    with tf.variable_scope('loss') as scope:
        # 下面把交叉熵和softmax合到一起写是为了通过spares提高计算速度
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='loss_per_eg')
        loss = tf.reduce_mean(cross_entropy, name='loss')  # 求所有样本的平均loss
        tf.summary.scalar(scope.name + '/loss', loss)
    return loss

def training(loss, learning_rate):

    with tf.name_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op = optimizer.minimize(loss, global_step=global_step)
    return train_op

def evaluation(logits, labels):
    with tf.variable_scope('accuracy') as scope:
        correct = tf.nn.in_top_k(logits, labels, 1)
        correct = tf.cast(correct, tf.float16)
        accuracy = tf.reduce_mean(correct)
        tf.summary.scalar(scope.name + '/accuracy', accuracy)
    return accuracy

def train():

    N_CLASSES = 2  # 猫和狗
    IMG_W = 208  # resize图像,太大的话训练时间久
    IMG_H = 208
    BATCH_SIZE = 64
    CAPACITY = 2000
    MAX_STEP = 15000  # 一般5K~10k####################################
    learning_rate = 0.00001  # 一般小于0.0001

    train_dir = 'E:/python ese/cat_dog/data/train/train'
    logs_train_dir = 'E:/python ese/cat_dog/log/'  # 记录训练过程与保存模型

    train, train_label = get_all_files(train_dir)
    train_batch, train_label_batch = get_batch(train,
                                               train_label,
                                               IMG_W,
                                               IMG_H,
                                               BATCH_SIZE,
                                               CAPACITY)
    train_logits = cnn_inference(train_batch, BATCH_SIZE, N_CLASSES)
    train_loss = losses(train_logits, train_label_batch)
    train_op = training(train_loss, learning_rate)
    train_acc = evaluation(train_logits, train_label_batch)

    summary_op = tf.summary.merge_all()

    # 可视化为了画折线图
    step_list = list(range(15000))
    cnn_list1 = []
    cnn_list2 = []
    fig = plt.figure()  # 建立可视化图像框
    ax = fig.add_subplot(1, 1, 1)  # 子图总行数、列数,位置
    ax.yaxis.grid(True)
    ax.set_title('cnn_accuracy ', fontsize=14, y=1.02)
    ax.set_xlabel('step')
    ax.set_ylabel('accuracy')
    bx = fig.add_subplot(1, 2, 2)
    bx.yaxis.grid(True)
    bx.set_title('cnn_loss ', fontsize=14, y=1.02)
    bx.set_xlabel('step')
    bx.set_ylabel('loss')

    # 初始化,如果存在变量则是必不可少的操作
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # 产生一个writer来写log文件
        train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
        # 产生一个saver来存储训练好的模型
        saver = tf.train.Saver()

        # 队列监控
        # batch训练法用到了队列,不想用队列也可以用placeholder
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            # 执行MAX_STEP步的训练,一步一个batch
            for step in np.arange(MAX_STEP):
                if coord.should_stop():
                    break
                # 启动以下操作节点,这里不能用train_op,因为它在第二次迭代是None,会导致session出错,改为_
                _op, tra_loss, tra_acc = sess.run([train_op, train_loss, train_acc])
                # 每隔50步打印一次当前的loss以及acc,同时记录log,写入writer
                if step % 50 == 0:
                    print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0))
                    summary_str = sess.run(summary_op)
                    train_writer.add_summary(summary_str, step)
                # 每隔100步画个图
                if step % 1 == 0:
                    cnn_list1.append(tra_acc)
                    cnn_list2.append(tra_loss)
                # 每隔5000步,保存一次训练好的模型
                if step % 5000 == 0 or (step + 1) == MAX_STEP:
                    checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=step)

            ax.plot(step_list, cnn_list1)
            bx.plot(step_list, cnn_list2)
            plt.show()

        except tf.errors.OutOfRangeError:
            print('Done training -- epoch limit reached')
        finally:
            coord.request_stop()

def get_one_image(train):
    '''Randomly pick one image from training data
    Return: ndarray
    '''
    n = len(train)
    ind = np.random.randint(0, n)
    img_dir = train[ind]

    image = Image.open(img_dir)
    plt.imshow(image)
    image = image.resize([208, 208])
    image = np.array(image)
    return image


def evaluate_one_image():
    train_dir = 'E:/python ese/cat_dog/data/test1/test1/'
    train, train_label = get_all_files(train_dir)
    image_array = get_one_image(train)

    with tf.Graph().as_default():
        BATCH_SIZE = 1
        N_CLASSES = 2

        image = tf.cast(image_array, tf.float32)
        image = tf.image.per_image_standardization(image)
        image = tf.reshape(image, [1, 208, 208, 3])
        logit = cnn_inference(image, BATCH_SIZE, N_CLASSES)

        logit = tf.nn.softmax(logit)

        x = tf.placeholder(tf.float32, shape=[208, 208, 3])

        # you need to change the directories to yours.
        logs_train_dir = 'E:/python ese/cat_dog/log/'

        saver = tf.train.Saver()

        with tf.Session() as sess:

            print("Reading checkpoints...")
            ckpt = tf.train.get_checkpoint_state(logs_train_dir)
            if ckpt and ckpt.model_checkpoint_path:
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                saver.restore(sess, ckpt.model_checkpoint_path)
                print('Loading success, global_step is %s' % global_step)
            else:
                print('No checkpoint file found')

            prediction = sess.run(logit, feed_dict={x: image_array})
            max_index = np.argmax(prediction)
            #if max_index == 0:
            print('有 %.6f可能性是猫' % prediction[:, 0])
           # else:
            print('有 %.6f可能性是狗' % prediction[:, 1])
    plt.imshow(image_array)
    plt.show()

if __name__ == '__main__':

    start = time.clock()
    train()
    #evaluate_one_image()
    elapsed = (time.clock() - start)
    print("用时%f 小时:" %(elapsed/3600))

你可能感兴趣的:(tensorflow,猫狗识别,人工智能)