用残差网络来实现猫狗数据集分类,猫狗分类是一个很经典的图像分类问题。自己用resnet网络来对猫狗分类进行了一个简单的实现,残差网络比较浅层,效果并不好,还可以加深层网络,更改超参数对模型进行改进。首先将数据集保存在对应路径下,用get_image获取图片和标签,再利用get_batch获得一个batch的图像数据和标签数据,代码都有着非常详细的注释。
mport tensorflow as tf
import os
import matplotlib as mpl
import numpy as np
from PIL import Image
mpl.rcParams['font.sans-serif']=[u'simHei']
mpl.rcParams['axes.unicode_minus']=False
cwd='D:/wemystic/datas/imagecalssify/tensorflow-vgg16-train-and-test-master/train'#训练图像路径
classes=['cat','dog']#文件夹目录
def get_image():
image_list=[]#构建存储路径的列表
label_list=[]#构建标签列表
for index,name in enumerate(classes):#分别获取路径和标签
class_path=cwd+'/'+name+'/'
for image_name in os.listdir(class_path):
image_path=class_path+image_name
image_list.append(image_path)#添加到图像列表中
label_list.append(index)#添加到标签列表
temp=np.array([image_list,label_list])#转换成矩阵形式
temp=temp.transpose()#转置
np.random.shuffle(temp)#打乱顺序
image_list=list(temp[:,0])#获取图像列表
label_list=list(temp[:,1])#获取标签列表
label_list=[int(i) for i in label_list]#转换成整形
return image_list,label_list
def get_batch(image_list,label_list,height,width,batch_size):
image_list=tf.cast(image_list,tf.string)#类型转换,转换成字符串
label_list=tf.cast(label_list,tf.int32)#类型转换,转换成int32
queue=tf.train.slice_input_producer([image_list,label_list])#生成队列
image=tf.read_file(queue[0])#获取图像信息
image=tf.image.decode_jpeg(image,channels=3)#对图像进行解码
image=tf.image.resize_image_with_crop_or_pad(image,height,width)#进行裁剪,剪成指定大小
image=tf.image.per_image_standardization(image)#进行归一化
label=queue[1]#获取标签
image_train_batch,label_train_batch=tf.train.batch(
[image,label],
batch_size=batch_size,
num_threads=8,
capacity=batch_size*10)#把图像和标签构建成batch的形式
image_train_batch=tf.cast(image_train_batch,tf.float32)#图像Batch进行类型转换
label_train_batch=tf.reshape(label_train_batch,shape=[batch_size])#标签Batch进行维度变换,转成一维向量
return image_train_batch,label_train_batch
接着构建resnet残差网络,构建损失,训练方式,得到准确率。
#构建残差网络
def model_net(input_x,n_class):
conv1=tf.layers.conv2d(input_x,filters=32,kernel_size=3,strides=1,padding='same')
pool1=tf.layers.max_pooling2d(conv1,pool_size=2,strides=2,padding='same')
conv2_1=resnet_bottleneck_block(pool1,64,False,1)
conv2_2=resnet_bottleneck_block(conv2_1,64,False,1)
conv2_3=resnet_bottleneck_block(conv2_2,64,False,1)
conv3_1=resnet_bottleneck_block(conv2_3,128,True,2)
conv3_2=resnet_bottleneck_block(conv3_1,128,False,1)
conv3_3=resnet_bottleneck_block(conv3_2,128,False,1)
conv3_4=resnet_bottleneck_block(conv3_3,128,False,1)
conv4_1=resnet_bottleneck_block(conv3_4,256,True,2)
conv4_2=resnet_bottleneck_block(conv4_1,256,False,1)
conv4_3=resnet_bottleneck_block(conv4_2,256,False,1)
conv4_4=resnet_bottleneck_block(conv4_3,256,False,1)
conv4_5=resnet_bottleneck_block(conv4_4,256,False,1)
conv4_6=resnet_bottleneck_block(conv4_5,256,False,1)
conv5_1=resnet_bottleneck_block(conv4_6,512,True,2)
conv5_2=resnet_bottleneck_block(conv5_1,512,False,1)
conv5_3=resnet_bottleneck_block(conv5_2,512,False,1)
avg_pool=tf.layers.average_pooling2d(conv5_3,pool_size=2,strides=2,padding='same')
shape=avg_pool.get_shape()
flat_shape=shape[1]*shape[2]*shape[3]
flatten=tf.reshape(avg_pool,shape=[-1,flat_shape])
logits=tf.layers.dense(flatten,units=n_class,activation=None)
return logits
def resnet_bottleneck_block(x,std_filters,resize=False,block_strides=1):
if resize:
shortcut=tf.layers.conv2d(x,filters=std_filters,kernel_size=3,strides=block_strides,padding='same')
shortcut=tf.nn.relu(shortcut)
else:
shortcut=x
y=tf.layers.conv2d(x,std_filters,kernel_size=1,strides=1,padding='same')
y=tf.nn.relu(y)
y=tf.layers.conv2d(y,filters=std_filters,kernel_size=3,strides=block_strides,padding='same')
y=tf.nn.relu(y)
y=tf.layers.conv2d(y,filters=std_filters*4,kernel_size=1,strides=1,padding='same')
y=tf.nn.relu(y)
if shortcut.shape[-1].value!=std_filters*4:
shortcut=tf.layers.conv2d(shortcut,filters=std_filters*4,kernel_size=1,strides=1,padding='same')
shortcut=tf.nn.relu(shortcut)
block_output=tf.add(y,shortcut)
block_output=tf.nn.relu(block_output)
return block_output
#构建损失
def create_loss(logits,labels):
loss=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=labels))
return loss
#构建训练方式
def create_train_opt(learning_rate,loss):
#optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate)
optimizer=tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_opt=optimizer.minimize(loss)
return train_opt
#获取准确率
def create_accuracy(logits,labels):
#correct_pred=tf.equal(tf.argmax(logits,1),labels)
correct_pred=tf.nn.in_top_k(logits,labels,1)
accuracy=tf.reduce_mean(tf.cast(correct_pred,tf.float16))
return accuracy
接下来,定义训练方式,开始训练。
#开始训练
def train():
#定义超参数
height=224
width=224
learning_rate=1e-3
batch_size=2
n_class=2
epochs=1000
step=0
#建图
with tf.Graph().as_default():
image_list,label_list=get_image()
image_train_batch,label_train_batch=get_batch(image_list,label_list,height=height,width=width,batch_size=batch_size)
logits=model_net(image_train_batch,n_class=n_class)
loss=create_loss(logits=logits,labels=label_train_batch)
train_opt=create_train_opt(learning_rate=learning_rate,loss=loss)
accuracy=create_accuracy(logits=logits,labels=label_train_batch)
#开启会话
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
coor=tf.train.Coordinator()#构建队列
threads=tf.train.start_queue_runners(sess=sess,coord=coor)#开启线程
#开始训练
for epoch in range(epochs):
sess.run(train_opt)
if step%5==0:
_,train_loss,train_acc=sess.run([train_opt,loss,accuracy])
print('Epoch:{}-Step:{}-Train Loss:{:.5f}-Train Acc:{:.5f}'.format(epoch,step,train_loss,train_acc))
step+=1
coor.request_stop()#关闭队列
coor.join(threads)
完整代码如下所示:
import tensorflow as tf
import os
import matplotlib as mpl
import numpy as np
from PIL import Image
mpl.rcParams['font.sans-serif']=[u'simHei']
mpl.rcParams['axes.unicode_minus']=False
cwd='D:/wemystic/datas/imagecalssify/tensorflow-vgg16-train-and-test-master/train'#训练图像路径
classes=['cat','dog']#文件夹目录
def get_image():
image_list=[]#构建存储路径的列表
label_list=[]#构建标签列表
for index,name in enumerate(classes):#分别获取路径和标签
class_path=cwd+'/'+name+'/'
for image_name in os.listdir(class_path):
image_path=class_path+image_name
image_list.append(image_path)#添加到图像列表中
label_list.append(index)#添加到标签列表
temp=np.array([image_list,label_list])#转换成矩阵形式
temp=temp.transpose()#转置
np.random.shuffle(temp)#打乱顺序
image_list=list(temp[:,0])#获取图像列表
label_list=list(temp[:,1])#获取标签列表
label_list=[int(i) for i in label_list]#转换成整形
return image_list,label_list
def get_batch(image_list,label_list,height,width,batch_size):
image_list=tf.cast(image_list,tf.string)#类型转换,转换成字符串
label_list=tf.cast(label_list,tf.int32)#类型转换,转换成int32
queue=tf.train.slice_input_producer([image_list,label_list])#生成队列
image=tf.read_file(queue[0])#获取图像信息
image=tf.image.decode_jpeg(image,channels=3)#对图像进行解码
image=tf.image.resize_image_with_crop_or_pad(image,height,width)#进行裁剪,剪成指定大小
image=tf.image.per_image_standardization(image)#进行归一化
label=queue[1]#获取标签
image_train_batch,label_train_batch=tf.train.batch(
[image,label],
batch_size=batch_size,
num_threads=8,
capacity=batch_size*10)#把图像和标签构建成batch的形式
image_train_batch=tf.cast(image_train_batch,tf.float32)#图像Batch进行类型转换
label_train_batch=tf.reshape(label_train_batch,shape=[batch_size])#标签Batch进行维度变换,转成一维向量
return image_train_batch,label_train_batch
#构建残差网络
def model_net(input_x,n_class):
conv1=tf.layers.conv2d(input_x,filters=32,kernel_size=3,strides=1,padding='same')
pool1=tf.layers.max_pooling2d(conv1,pool_size=2,strides=2,padding='same')
conv2_1=resnet_bottleneck_block(pool1,64,False,1)
conv2_2=resnet_bottleneck_block(conv2_1,64,False,1)
conv2_3=resnet_bottleneck_block(conv2_2,64,False,1)
conv3_1=resnet_bottleneck_block(conv2_3,128,True,2)
conv3_2=resnet_bottleneck_block(conv3_1,128,False,1)
conv3_3=resnet_bottleneck_block(conv3_2,128,False,1)
conv3_4=resnet_bottleneck_block(conv3_3,128,False,1)
conv4_1=resnet_bottleneck_block(conv3_4,256,True,2)
conv4_2=resnet_bottleneck_block(conv4_1,256,False,1)
conv4_3=resnet_bottleneck_block(conv4_2,256,False,1)
conv4_4=resnet_bottleneck_block(conv4_3,256,False,1)
conv4_5=resnet_bottleneck_block(conv4_4,256,False,1)
conv4_6=resnet_bottleneck_block(conv4_5,256,False,1)
conv5_1=resnet_bottleneck_block(conv4_6,512,True,2)
conv5_2=resnet_bottleneck_block(conv5_1,512,False,1)
conv5_3=resnet_bottleneck_block(conv5_2,512,False,1)
avg_pool=tf.layers.average_pooling2d(conv5_3,pool_size=2,strides=2,padding='same')
shape=avg_pool.get_shape()
flat_shape=shape[1]*shape[2]*shape[3]
flatten=tf.reshape(avg_pool,shape=[-1,flat_shape])
logits=tf.layers.dense(flatten,units=n_class,activation=None)
return logits
def resnet_bottleneck_block(x,std_filters,resize=False,block_strides=1):
if resize:
shortcut=tf.layers.conv2d(x,filters=std_filters,kernel_size=3,strides=block_strides,padding='same')
shortcut=tf.nn.relu(shortcut)
else:
shortcut=x
y=tf.layers.conv2d(x,std_filters,kernel_size=1,strides=1,padding='same')
y=tf.nn.relu(y)
y=tf.layers.conv2d(y,filters=std_filters,kernel_size=3,strides=block_strides,padding='same')
y=tf.nn.relu(y)
y=tf.layers.conv2d(y,filters=std_filters*4,kernel_size=1,strides=1,padding='same')
y=tf.nn.relu(y)
if shortcut.shape[-1].value!=std_filters*4:
shortcut=tf.layers.conv2d(shortcut,filters=std_filters*4,kernel_size=1,strides=1,padding='same')
shortcut=tf.nn.relu(shortcut)
block_output=tf.add(y,shortcut)
block_output=tf.nn.relu(block_output)
return block_output
#构建损失
def create_loss(logits,labels):
loss=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=labels))
return loss
#构建训练方式
def create_train_opt(learning_rate,loss):
#optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate)
optimizer=tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_opt=optimizer.minimize(loss)
return train_opt
#获取准确率
def create_accuracy(logits,labels):
#correct_pred=tf.equal(tf.argmax(logits,1),labels)
correct_pred=tf.nn.in_top_k(logits,labels,1)
accuracy=tf.reduce_mean(tf.cast(correct_pred,tf.float16))
return accuracy
#开始训练
def train():
#定义超参数
height=224
width=224
learning_rate=1e-3
batch_size=2
n_class=2
epochs=1000
step=0
#建图
with tf.Graph().as_default():
image_list,label_list=get_image()
image_train_batch,label_train_batch=get_batch(image_list,label_list,height=height,width=width,batch_size=batch_size)
logits=model_net(image_train_batch,n_class=n_class)
loss=create_loss(logits=logits,labels=label_train_batch)
train_opt=create_train_opt(learning_rate=learning_rate,loss=loss)
accuracy=create_accuracy(logits=logits,labels=label_train_batch)
#开启会话
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
coor=tf.train.Coordinator()#构建队列
threads=tf.train.start_queue_runners(sess=sess,coord=coor)#开启线程
#开始训练
for epoch in range(epochs):
sess.run(train_opt)
if step%5==0:
_,train_loss,train_acc=sess.run([train_opt,loss,accuracy])
print('Epoch:{}-Step:{}-Train Loss:{:.5f}-Train Acc:{:.5f}'.format(epoch,step,train_loss,train_acc))
step+=1
coor.request_stop()#关闭队列
coor.join(threads)
if __name__ == '__main__':
train()
训练过程准确率,损失如下图:
Step:40-Train Loss:0.65000-Train Acc:0.50000
Step:45-Train Loss:0.72891-Train Acc:0.00000
Step:50-Train Loss:0.71797-Train Acc:0.50000
Step:55-Train Loss:0.64206-Train Acc:0.50000
Step:60-Train Loss:0.91496-Train Acc:0.00000