使用了VGG19的模型迁移到猫狗识别中,并且在最后添加了两层FC全连接层用于分类。
并且网络中添加了学习率衰减以及平均滑动模型
其中train_image(2w5张图片)存放训练样本 , test1(256张图片)存放测试样本
test1:
VGG模型:
需要自己下载
VGG迁移学习_猫狗识别.py
VGG_PAT需要修改成自己本地保存VGG模型的地址
#迁移学习猫狗识别
#首先下载vgg19网络的mat文件,然后对数据进行预处理
#直接送到了我们预先展开的vgg网络中,注意这里权重是constant,直接把别人训练好的权重拿来初始化,然后最后改一下全连接层
import tensorflow as tf
import numpy as np
import get_files
import get_batch
import VGG_net
import model
import os
tf.app.flags.DEFINE_integer('image_size', 224, '图片尺寸')
tf.app.flags.DEFINE_integer('batch_size', 32, '每次训练图片的张数')
tf.app.flags.DEFINE_integer('capacity', 256, '队列中最多容纳元素的个数')
tf.app.flags.DEFINE_float('learning_rate_base', 0.0001, '基础的学习率用于指数衰减的学习率中')
tf.app.flags.DEFINE_float('learning_rate_decay', 0.99, '学习率的衰减率')
tf.app.flags.DEFINE_float('moving_average_decay', 0.99, '滑动平均的衰减率')
tf.app.flags.DEFINE_integer('training_steps', 6000, '训练的轮数')
tf.app.flags.DEFINE_integer('n_class', 2, '类别数目')
tf.app.flags.DEFINE_integer('all_number', 25000, '训练样本总数')
tf.app.flags.DEFINE_string('train_dir', './train_image', '数据存放地址')
tf.app.flags.DEFINE_string('logs_train_dir', './logs_train_dir/', '训练集输出日志保存的路径')
tf.app.flags.DEFINE_string('save_dir', './save/', '模型保存的路径')
tf.app.flags.DEFINE_string('VGG_PATH', '../FCN.tensorflow-master/Model_zoo/imagenet-vgg-verydeep-19.mat', 'VGG网络参数')
FLAGS = tf.app.flags.FLAGS
def main(argv=None):
print('获取图片和标签集中')
train, train_label = get_files.get_files(FLAGS.train_dir)
print('生成批次中')
train_batch, train_label_batch =get_batch.get_batch(train,train_label,FLAGS.image_size,FLAGS.image_size,FLAGS.batch_size,FLAGS.capacity)
print('train_batch',train_batch.shape)
nets=VGG_net.net(FLAGS.VGG_PATH,train_batch)#进入VGG模型,传入权重参数和预测图像,获得所有层输出结果
#修改VGG网络,最后层添加两个全连接层
with tf.variable_scope("dense1"):
image=tf.reshape(nets["relu5_4"],[FLAGS.batch_size,-1])#相当于在微调网络模型,从relu5_4开始调整
weights=tf.Variable(tf.random_normal(shape=[14*14*512,10],stddev=0.1))
bias=tf.Variable(tf.zeros(shape=[10])+0.1)
dense1=tf.nn.tanh(tf.matmul(image,weights)+bias)
with tf.variable_scope("out"):
weights=tf.Variable(tf.random_normal(shape=[10,FLAGS.n_class],stddev=0.1))
bias=tf.Variable(tf.zeros(shape=[FLAGS.n_class])+0.1)
out=tf.matmul(dense1,weights)+bias
loss=model.loss(logits=out,labels=train_label_batch)
op=model.train(learning_rate_base=FLAGS.learning_rate_base,loss=loss,
learning_rate_decay=FLAGS.learning_rate_decay,all_number=FLAGS.all_number,batch_size=FLAGS.batch_size)
train_end = model.moving(moving_average_decay=FLAGS.moving_average_decay, train_step=op) # 滑动模型最终的梯度下降值
accuracy=model.accuracy(out=out,train_label_batch=train_label_batch)
saver = tf.train.Saver() # 初始化持久化类
summary_op = tf.summary.merge_all() # 合并所有日志
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())#变量初始化
train_writer = tf.summary.FileWriter(FLAGS.logs_train_dir, sess.graph) # 训练集日记保存
coord = tf.train.Coordinator()#线程
threads = tf.train.start_queue_runners(sess=sess, coord=coord)#线程
try:
for step in range(FLAGS.training_steps):#循环100次
summary,_,tra_loss, tra_acc = sess.run([summary_op,train_end, loss, accuracy])
if step % 50 == 0 and step != 0:
train_writer.add_summary(summary, step) # 保存训练日志
print("step", step, "loss", tra_loss, "acc", tra_acc * 100.0)
if step % 2000 == 0 or (step + 1) == FLAGS.training_steps:#每一步都显示损失值和精确值
saver.save(sess, os.path.join(FLAGS.save_dir, 'model.ckpt'), global_step=step)
except tf.errors.OutOfRangeError:
print('训练出现出错')
finally: # 线程结束
coord.request_stop()
coord.join(threads)
if __name__=="__main__":
tf.app.run()
VGG_net.py
import tensorflow as tf
import numpy as np
import scipy.io as scio
def _conv_layer(input, weights, bias):
conv = tf.nn.conv2d(input, tf.constant(weights), strides=[1, 1, 1, 1], padding="SAME")
return tf.nn.bias_add(conv, bias)
def _pool_layer(input):
return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1,), padding="SAME")
def net(data_path,input_image):
layers=('conv1_1','relu1_1','conv1_2','relu1_2','pool1',# VGG网络前五大部分
'conv2_1','relu2_1','conv2_2','relu2_2','pool2',
'conv3_1','relu3_1','conv3_2','relu3_2','conv3_3','relu3_3','conv3_4','relu3_4','pool3',
'conv4_1','relu4_1','conv4_2','relu4_2','conv4_3','relu4_3','conv4_4','relu4_4','pool4',
'conv5_1', 'relu5_1','conv5_2','relu5_2','conv5_3','relu5_3','conv5_4','relu5_4'
)
data=scio.loadmat(data_path)#返回VGG19模型中内容
mean=data['normalization'][0][0][0]# 获得图像均值
mean_pixel=np.mean(mean,axis=(0,1))#RGB
weights=data['layers'][0]#压缩VGG网络中参数,把维度是1的维度去掉 剩下的就是权重
net={}
current=input_image#预测图像
for i,name in enumerate(layers):
kind=name[:4]
if kind=='conv':
kernels,bias=weights[i][0][0][0][0]
kernels=np.transpose(kernels,[1,0,2,3])
bias=bias.reshape(-1)
current=_conv_layer(current,kernels,bias)
elif kind=='relu':
current=tf.nn.relu(current)#激活函数
elif kind=="pool":
current=_pool_layer(current)#池化
net[name]=current# 每层前向传播结果放在net中,是一个字典
assert len(net)==len(layers)
return net
model.py
import tensorflow as tf
def loss(logits,labels):
with tf.variable_scope('loss') as scope:
loss=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
tf.summary.scalar(scope.name + '/loss', loss)
return loss
def train(learning_rate_base,loss,learning_rate_decay,all_number,batch_size):
with tf.variable_scope('optimizer') as scope:
#op = tf.train.AdamOptimizer(learning_rate_base).minimize(loss) # 梯度下降
learning_rate = tf.train.exponential_decay(
learning_rate_base, tf.Variable(0, trainable=False), all_number / batch_size,
learning_rate_decay)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
global_step = tf.Variable(0, trainable=False, name='global_step')
train_op = optimizer.minimize(loss, global_step=global_step, name=scope.name)
return train_op
def moving(moving_average_decay,train_step):
with tf.variable_scope('moving_optimizer') as scope:
# 设置滑动平均方法
variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay,tf.Variable(0, trainable=False)) # 定义滑动平均类
variable_averages_op = variable_averages.apply(tf.trainable_variables()) # 在所有可训练的变量上使用滑动平均
# 同时反向传播和滑动平均
with tf.control_dependencies([train_step, variable_averages_op]):
train_op = tf.no_op(name='moving_optimizer')
return train_op
def accuracy(out,train_label_batch):
with tf.variable_scope('accuracy') as scope:
accuracy = tf.reduce_mean(tf.cast(tf.nn.in_top_k(out, train_label_batch, 1), tf.float32)) # 准确度
tf.summary.scalar(scope.name + '/accuracy', accuracy)
return accuracy
get_files.py
import os
import numpy as np
def get_files(file_dir):
cats = []
label_cats = []
dogs = []
label_dogs = []
for file in os.listdir(file_dir):
name = file.split(sep='.')
if 'cat' in name[0]:
cats.append(file_dir +"\\"+ file)
label_cats.append(0)
else:
if 'dog' in name[0]:
dogs.append(file_dir +"\\"+ file)
label_dogs.append(1)
image_list = np.hstack((cats, dogs))
label_list = np.hstack((label_cats, label_dogs))
# 把标签和图片都放倒一个 temp 中 然后打乱顺序,然后取出来
temp = np.array([image_list, label_list])
temp = temp.transpose()
# 打乱顺序
np.random.shuffle(temp)
# 取出第一个元素作为 image 第二个元素作为 label
image_list = list(temp[:, 0])
label_list = list(temp[:, 1])
label_list = [int(i) for i in label_list]
return image_list, label_list
get_batch.py
import tensorflow as tf
# image_W ,image_H 指定图片大小,batch_size 每批读取的个数 ,capacity队列中 最多容纳元素的个数
def get_batch(image, label, image_W, image_H, batch_size, capacity):
# 转换数据为 ts 能识别的格式
image = tf.cast(image, tf.string)
label = tf.cast(label, tf.int32)
# 将image 和 label 放倒队列里
input_queue = tf.train.slice_input_producer([image, label])
label = input_queue[1]
# 读取图片的全部信息
image_contents = tf.read_file(input_queue[0])
# 把图片解码,channels =3 为彩色图片, r,g ,b 黑白图片为 1 ,也可以理解为图片的厚度
image = tf.image.decode_jpeg(image_contents, channels=3)
# 将图片以图片中心进行裁剪或者扩充为 指定的image_W,image_H
image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
# 对数据进行标准化,标准化,就是减去它的均值,除以他的方差
image = tf.image.per_image_standardization(image)
# 生成批次 num_threads 有多少个线程根据电脑配置设置 capacity 队列中 最多容纳图片的个数 tf.train.shuffle_batch 打乱顺序,
image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=64, capacity=capacity)
# 重新定义下 label_batch 的形状
label_batch = tf.reshape(label_batch, [batch_size])
# 转化图片
image_batch = tf.cast(image_batch, tf.float32)
return image_batch, label_batch
predict_one.py
测试
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import model
import os
import VGG_net
import cv2
# 从指定目录中选取一张图片
def get_one_image(train):
files = os.listdir(train) # os.listdir():得到路径下所有图片名字或者文件夹名字
n = len(files) # 得到长度,即总共多少张图片
ind = np.random.randint(0, n) # 从0到n中随机选择一个数字
img_dir = os.path.join(train, files[ind]) # 路径并接,得到某个图片的具体路径
image = Image.open(img_dir) # 加载该图片
plt.imshow(image) # 显示
plt.show()
image = image.resize([224, 224]) # 把预测图片尺寸修改成与训练样本尺寸一样大小
image = np.array(image) # 转换成数组格式
return image
def evaluate_one_image():
# 存放的是我从百度下载的猫狗图片路径
train = './test1/'
image_array = get_one_image(train)
BATCH_SIZE = 1 # 因为只读取一副图片 所以batch 设置为1
N_CLASSES = 2 # 2个输出神经元,[1,0] 或者 [0,1]猫和狗的概率
# 转化图片格式,模型卷积层里面conv2d()要求输入数据为float32类型
im = cv2.resize(image_array, (224, 224), interpolation=cv2.INTER_CUBIC) # mnist检测图片尺寸为28*28,所以改变测试图片尺寸为28*28
image = tf.cast(im, tf.float32)
# 图片原来是三维的 [208, 208, 3] 重新定义图片形状 改为一个4D 四维的 tensor
image = tf.reshape(image, [1, 224, 224, 3])
# 用最原始的输入数据的方式向模型输入数据 placeholder
x = tf.placeholder(tf.float32, shape=[1,224, 224, 3])
nets = VGG_net.net('../FCN.tensorflow-master/Model_zoo/imagenet-vgg-verydeep-19.mat', x) # 进入VGG模型,传入权重参数和预测图像,获得所有层输出结果
# 修改VGG网络,最后层添加两个全连接层
with tf.variable_scope("dense1"):
i = tf.reshape(nets["relu5_4"], [1, -1]) # 相当于在微调网络模型,从relu5_4开始调整
weights = tf.Variable(tf.random_normal(shape=[14 * 14 * 512, 10], stddev=0.1))
bias = tf.Variable(tf.zeros(shape=[10]) + 0.1)
dense1 = tf.nn.tanh(tf.matmul(i, weights) + bias)
with tf.variable_scope("out"):
weights = tf.Variable(tf.random_normal(shape=[10, 2], stddev=0.1))
bias = tf.Variable(tf.zeros(shape=[2]) + 0.1)
out = tf.matmul(dense1, weights) + bias
# 因为 inference 的返回没有用激活函数,所以在这里对结果用softmax 激活
logit = tf.nn.softmax(out)
# 存放模型的路径
logs_train_dir = 'save/'
# 保存或者读取模型
saver = tf.train.Saver()
# 打开模型
ckpt = tf.train.get_checkpoint_state(logs_train_dir) # tf.train.get_checkpoint_state加载模型路径
if ckpt and ckpt.model_checkpoint_path:
# ckpt.model_checkpoint_path.split('-')[-1]得到最新的模型,再按‘-’分割取最后一个
global_step = ckpt.model_checkpoint_path.split('-')[-1]
saver.restore(sess, ckpt.model_checkpoint_path)
print('模型加载成功, 训练的步数为 %s' % global_step)
else:
print('模型加载失败,,,文件没有找到')
# 将图片输入到模型计算
prediction = sess.run(logit, feed_dict={x: image.eval()})
print('猫的概率 %.6f' % prediction[:, 0])
print('狗的概率 %.6f' % prediction[:, 1])
if prediction[:, 0]>=prediction[:, 1]:
print('图片为猫')
else:
print('图片为狗')
if __name__=="__main__":
# 测试
print("正在检测")
sess = tf.InteractiveSession() # 创建tensorflow的默认会话:
evaluate_one_image()
结果: