迁移学习VGG_猫狗识别

使用了VGG19的模型迁移到猫狗识别中,并且在最后添加了两层FC全连接层用于分类。

并且网络中添加了学习率衰减以及平均滑动模型

 

迁移学习VGG_猫狗识别_第1张图片

其中train_image(2w5张图片)存放训练样本 , test1(256张图片)存放测试样本

train_image:迁移学习VGG_猫狗识别_第2张图片

 test1:

迁移学习VGG_猫狗识别_第3张图片

 VGG模型:

需要自己下载

VGG迁移学习_猫狗识别.py

VGG_PAT需要修改成自己本地保存VGG模型的地址

#迁移学习猫狗识别
#首先下载vgg19网络的mat文件,然后对数据进行预处理
#直接送到了我们预先展开的vgg网络中,注意这里权重是constant,直接把别人训练好的权重拿来初始化,然后最后改一下全连接层
import tensorflow as tf
import numpy as np
import get_files
import get_batch
import VGG_net
import model
import os
tf.app.flags.DEFINE_integer('image_size', 224, '图片尺寸')
tf.app.flags.DEFINE_integer('batch_size', 32, '每次训练图片的张数')
tf.app.flags.DEFINE_integer('capacity', 256, '队列中最多容纳元素的个数')
tf.app.flags.DEFINE_float('learning_rate_base', 0.0001, '基础的学习率用于指数衰减的学习率中')
tf.app.flags.DEFINE_float('learning_rate_decay', 0.99, '学习率的衰减率')
tf.app.flags.DEFINE_float('moving_average_decay', 0.99, '滑动平均的衰减率')
tf.app.flags.DEFINE_integer('training_steps', 6000, '训练的轮数')
tf.app.flags.DEFINE_integer('n_class', 2, '类别数目')
tf.app.flags.DEFINE_integer('all_number', 25000, '训练样本总数')
tf.app.flags.DEFINE_string('train_dir', './train_image', '数据存放地址')
tf.app.flags.DEFINE_string('logs_train_dir', './logs_train_dir/', '训练集输出日志保存的路径')
tf.app.flags.DEFINE_string('save_dir', './save/', '模型保存的路径')
tf.app.flags.DEFINE_string('VGG_PATH', '../FCN.tensorflow-master/Model_zoo/imagenet-vgg-verydeep-19.mat', 'VGG网络参数')
FLAGS = tf.app.flags.FLAGS



def main(argv=None):
    print('获取图片和标签集中')
    train, train_label = get_files.get_files(FLAGS.train_dir)
    print('生成批次中')
    train_batch, train_label_batch =get_batch.get_batch(train,train_label,FLAGS.image_size,FLAGS.image_size,FLAGS.batch_size,FLAGS.capacity)
    print('train_batch',train_batch.shape)
    nets=VGG_net.net(FLAGS.VGG_PATH,train_batch)#进入VGG模型,传入权重参数和预测图像,获得所有层输出结果

    #修改VGG网络,最后层添加两个全连接层
    with tf.variable_scope("dense1"):
        image=tf.reshape(nets["relu5_4"],[FLAGS.batch_size,-1])#相当于在微调网络模型,从relu5_4开始调整
        weights=tf.Variable(tf.random_normal(shape=[14*14*512,10],stddev=0.1))
        bias=tf.Variable(tf.zeros(shape=[10])+0.1)
        dense1=tf.nn.tanh(tf.matmul(image,weights)+bias)

    with tf.variable_scope("out"):
        weights=tf.Variable(tf.random_normal(shape=[10,FLAGS.n_class],stddev=0.1))
        bias=tf.Variable(tf.zeros(shape=[FLAGS.n_class])+0.1)
        out=tf.matmul(dense1,weights)+bias



    loss=model.loss(logits=out,labels=train_label_batch)

    op=model.train(learning_rate_base=FLAGS.learning_rate_base,loss=loss,
                  learning_rate_decay=FLAGS.learning_rate_decay,all_number=FLAGS.all_number,batch_size=FLAGS.batch_size)

    train_end = model.moving(moving_average_decay=FLAGS.moving_average_decay, train_step=op)  # 滑动模型最终的梯度下降值


    accuracy=model.accuracy(out=out,train_label_batch=train_label_batch)


    saver = tf.train.Saver()  # 初始化持久化类
    summary_op = tf.summary.merge_all()  # 合并所有日志

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())#变量初始化
        train_writer = tf.summary.FileWriter(FLAGS.logs_train_dir, sess.graph)  # 训练集日记保存
        coord = tf.train.Coordinator()#线程
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)#线程
        try:
            for step in range(FLAGS.training_steps):#循环100次
                summary,_,tra_loss, tra_acc = sess.run([summary_op,train_end, loss, accuracy])

                if step % 50 == 0 and step != 0:
                    train_writer.add_summary(summary, step)  # 保存训练日志
                    print("step", step, "loss", tra_loss, "acc", tra_acc * 100.0)

                if step % 2000 == 0 or (step + 1) == FLAGS.training_steps:#每一步都显示损失值和精确值
                    saver.save(sess, os.path.join(FLAGS.save_dir, 'model.ckpt'), global_step=step)

        except tf.errors.OutOfRangeError:
            print('训练出现出错')
        finally:  # 线程结束
            coord.request_stop()
        coord.join(threads)


if __name__=="__main__":
    tf.app.run()

 

VGG_net.py

import tensorflow as tf
import numpy as np
import scipy.io as scio
def _conv_layer(input, weights, bias):
    conv = tf.nn.conv2d(input, tf.constant(weights), strides=[1, 1, 1, 1], padding="SAME")
    return tf.nn.bias_add(conv, bias)
def _pool_layer(input):
    return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1,), padding="SAME")

def net(data_path,input_image):
    layers=('conv1_1','relu1_1','conv1_2','relu1_2','pool1',# VGG网络前五大部分
            'conv2_1','relu2_1','conv2_2','relu2_2','pool2',
            'conv3_1','relu3_1','conv3_2','relu3_2','conv3_3','relu3_3','conv3_4','relu3_4','pool3',
            'conv4_1','relu4_1','conv4_2','relu4_2','conv4_3','relu4_3','conv4_4','relu4_4','pool4',
            'conv5_1', 'relu5_1','conv5_2','relu5_2','conv5_3','relu5_3','conv5_4','relu5_4'
            )
    data=scio.loadmat(data_path)#返回VGG19模型中内容
    mean=data['normalization'][0][0][0]# 获得图像均值
    mean_pixel=np.mean(mean,axis=(0,1))#RGB
    weights=data['layers'][0]#压缩VGG网络中参数,把维度是1的维度去掉 剩下的就是权重
    net={}
    current=input_image#预测图像
    for i,name in enumerate(layers):
        kind=name[:4]
        if kind=='conv':
            kernels,bias=weights[i][0][0][0][0]
            kernels=np.transpose(kernels,[1,0,2,3])
            bias=bias.reshape(-1)
            current=_conv_layer(current,kernels,bias)
        elif kind=='relu':
            current=tf.nn.relu(current)#激活函数
        elif kind=="pool":
            current=_pool_layer(current)#池化
        net[name]=current# 每层前向传播结果放在net中,是一个字典
    assert len(net)==len(layers)
    return net

 

model.py

import tensorflow as tf
def loss(logits,labels):
    with tf.variable_scope('loss') as scope:
        loss=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
        tf.summary.scalar(scope.name + '/loss', loss)
    return loss

def train(learning_rate_base,loss,learning_rate_decay,all_number,batch_size):
    with tf.variable_scope('optimizer') as scope:
        #op = tf.train.AdamOptimizer(learning_rate_base).minimize(loss)  # 梯度下降

        learning_rate = tf.train.exponential_decay(
            learning_rate_base, tf.Variable(0, trainable=False), all_number / batch_size,
            learning_rate_decay)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        global_step = tf.Variable(0, trainable=False, name='global_step')
        train_op = optimizer.minimize(loss, global_step=global_step, name=scope.name)

    return train_op

def moving(moving_average_decay,train_step):
    with tf.variable_scope('moving_optimizer') as scope:
        # 设置滑动平均方法
        variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay,tf.Variable(0, trainable=False))  # 定义滑动平均类
        variable_averages_op = variable_averages.apply(tf.trainable_variables())  # 在所有可训练的变量上使用滑动平均
        # 同时反向传播和滑动平均
        with tf.control_dependencies([train_step, variable_averages_op]):
            train_op = tf.no_op(name='moving_optimizer')

    return train_op

def accuracy(out,train_label_batch):
    with tf.variable_scope('accuracy') as scope:
        accuracy = tf.reduce_mean(tf.cast(tf.nn.in_top_k(out, train_label_batch, 1), tf.float32))  # 准确度
        tf.summary.scalar(scope.name + '/accuracy', accuracy)
    return accuracy

 

get_files.py

import os
import numpy as np
def get_files(file_dir):
    cats = []
    label_cats = []
    dogs = []
    label_dogs = []
    for file in os.listdir(file_dir):
        name = file.split(sep='.')
        if 'cat' in name[0]:
            cats.append(file_dir +"\\"+ file)
            label_cats.append(0)
        else:
            if 'dog' in name[0]:
                dogs.append(file_dir +"\\"+ file)
                label_dogs.append(1)
        image_list = np.hstack((cats, dogs))
        label_list = np.hstack((label_cats, label_dogs))
    # 把标签和图片都放倒一个 temp 中 然后打乱顺序,然后取出来
    temp = np.array([image_list, label_list])
    temp = temp.transpose()
    # 打乱顺序
    np.random.shuffle(temp)
    # 取出第一个元素作为 image 第二个元素作为 label
    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(i) for i in label_list]
    return image_list, label_list

 

get_batch.py

import tensorflow as tf
# image_W ,image_H 指定图片大小,batch_size 每批读取的个数 ,capacity队列中 最多容纳元素的个数
def get_batch(image, label, image_W, image_H, batch_size, capacity):
    # 转换数据为 ts 能识别的格式
    image = tf.cast(image, tf.string)
    label = tf.cast(label, tf.int32)

    # 将image 和 label 放倒队列里
    input_queue = tf.train.slice_input_producer([image, label])
    label = input_queue[1]
    # 读取图片的全部信息
    image_contents = tf.read_file(input_queue[0])
    # 把图片解码,channels =3 为彩色图片, r,g ,b  黑白图片为 1 ,也可以理解为图片的厚度
    image = tf.image.decode_jpeg(image_contents, channels=3)
    # 将图片以图片中心进行裁剪或者扩充为 指定的image_W,image_H
    image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
    # 对数据进行标准化,标准化,就是减去它的均值,除以他的方差
    image = tf.image.per_image_standardization(image)
    # 生成批次  num_threads 有多少个线程根据电脑配置设置  capacity 队列中 最多容纳图片的个数  tf.train.shuffle_batch 打乱顺序,
    image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=64, capacity=capacity)

    # 重新定义下 label_batch 的形状
    label_batch = tf.reshape(label_batch, [batch_size])
    # 转化图片
    image_batch = tf.cast(image_batch, tf.float32)
    return image_batch, label_batch

 

predict_one.py

测试

import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import model
import os
import VGG_net
import cv2
# 从指定目录中选取一张图片
def get_one_image(train):
    files = os.listdir(train)  # os.listdir():得到路径下所有图片名字或者文件夹名字
    n = len(files)  # 得到长度,即总共多少张图片
    ind = np.random.randint(0, n)  # 从0到n中随机选择一个数字
    img_dir = os.path.join(train, files[ind])  # 路径并接,得到某个图片的具体路径
    image = Image.open(img_dir)  # 加载该图片
    plt.imshow(image)  # 显示
    plt.show()
    image = image.resize([224, 224])  # 把预测图片尺寸修改成与训练样本尺寸一样大小
    image = np.array(image)  # 转换成数组格式
    return image


def evaluate_one_image():
    # 存放的是我从百度下载的猫狗图片路径
    train = './test1/'
    image_array = get_one_image(train)
    BATCH_SIZE = 1  # 因为只读取一副图片 所以batch 设置为1
    N_CLASSES = 2  # 2个输出神经元,[1,0] 或者 [0,1]猫和狗的概率
    # 转化图片格式,模型卷积层里面conv2d()要求输入数据为float32类型
    im = cv2.resize(image_array, (224, 224), interpolation=cv2.INTER_CUBIC)  # mnist检测图片尺寸为28*28,所以改变测试图片尺寸为28*28
    image = tf.cast(im, tf.float32)
    # 图片原来是三维的 [208, 208, 3] 重新定义图片形状 改为一个4D  四维的 tensor
    image = tf.reshape(image, [1, 224, 224, 3])

    # 用最原始的输入数据的方式向模型输入数据 placeholder
    x = tf.placeholder(tf.float32, shape=[1,224, 224, 3])

    nets = VGG_net.net('../FCN.tensorflow-master/Model_zoo/imagenet-vgg-verydeep-19.mat', x)  # 进入VGG模型,传入权重参数和预测图像,获得所有层输出结果
    # 修改VGG网络,最后层添加两个全连接层
    with tf.variable_scope("dense1"):
        i = tf.reshape(nets["relu5_4"], [1, -1])  # 相当于在微调网络模型,从relu5_4开始调整
        weights = tf.Variable(tf.random_normal(shape=[14 * 14 * 512, 10], stddev=0.1))
        bias = tf.Variable(tf.zeros(shape=[10]) + 0.1)
        dense1 = tf.nn.tanh(tf.matmul(i, weights) + bias)

    with tf.variable_scope("out"):
        weights = tf.Variable(tf.random_normal(shape=[10, 2], stddev=0.1))
        bias = tf.Variable(tf.zeros(shape=[2]) + 0.1)
        out = tf.matmul(dense1, weights) + bias



    # 因为 inference 的返回没有用激活函数,所以在这里对结果用softmax 激活
    logit = tf.nn.softmax(out)


    # 存放模型的路径
    logs_train_dir = 'save/'
    # 保存或者读取模型
    saver = tf.train.Saver()
    # 打开模型
    ckpt = tf.train.get_checkpoint_state(logs_train_dir)  # tf.train.get_checkpoint_state加载模型路径
    if ckpt and ckpt.model_checkpoint_path:
        # ckpt.model_checkpoint_path.split('-')[-1]得到最新的模型,再按‘-’分割取最后一个
        global_step = ckpt.model_checkpoint_path.split('-')[-1]
        saver.restore(sess, ckpt.model_checkpoint_path)
        print('模型加载成功, 训练的步数为 %s' % global_step)
    else:
        print('模型加载失败,,,文件没有找到')
    # 将图片输入到模型计算
    prediction = sess.run(logit, feed_dict={x: image.eval()})
    print('猫的概率 %.6f' % prediction[:, 0])
    print('狗的概率 %.6f' % prediction[:, 1])
    if prediction[:, 0]>=prediction[:, 1]:
        print('图片为猫')
    else:
        print('图片为狗')



if __name__=="__main__":

    # 测试
    print("正在检测")
    sess = tf.InteractiveSession()  # 创建tensorflow的默认会话:
    evaluate_one_image()

 

结果:

迁移学习VGG_猫狗识别_第4张图片

迁移学习VGG_猫狗识别_第5张图片

你可能感兴趣的:(计算机视觉)