1、搭建VGG神经网络模型(16 weight layers)
2、训练模型
3、验证模型
1、图中最后一层全连接层(fully neted)的1000指分类数(class_num),根据实际问题修改,本作业为5分类问题,故为5;
2、softmax层将预测概率利用指数函数映射为非负,再利用归一化方法将概率转化为0-1,且各个概率的预测总和为1,如果只想得到最大概率的索引,即分到哪一类了,则可忽略这一层;
3、VGGnet的标准输入为244x244的RGB图片,卷积核为3x3,步长为1,上图中可以看出,VGG共有5组卷积,每组的卷积层间图片尺寸不变,故padding为same或1;
4、池化层采用最大化池化,滤波器为2x2,步长为2,使得图片长宽均减半而不改变深度;
5、最后一次池化后,要将输出进行展平(paddle.nn.Flatten(); pytorch中为.view())再送给全连接层,shape的变化为 (长,宽,深度)--->(1,1,长x宽x深度)。
# 构建VGG网络
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
class VGGNet(nn.Layer):
## 完成此部分代码
def __init__(self):
super(VGGNet, self).__init__()
self.conv1 = nn.Sequential(nn.Conv2D(3, 64, 3, 1, 1), nn.ReLU(), nn.Conv2D(64, 64, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))
self.conv2 = nn.Sequential(nn.Conv2D(64, 128, 3, 1, 1), nn.ReLU(), nn.Conv2D(128, 128, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))
self.conv3 = nn.Sequential(nn.Conv2D(128, 256, 3, 1, 1), nn.ReLU(), nn.Conv2D(256, 256, 3, 1, 1), nn.ReLU(), nn.Conv2D(256, 256, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))
self.conv4 = nn.Sequential(nn.Conv2D(256, 512, 3, 1, 1), nn.ReLU(), nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))
self.conv5 = nn.Sequential(nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.Conv2D(512, 512, 3, 1, 1), nn.ReLU(), nn.MaxPool2D(2))
self.flatten = nn.Flatten()
self.linear1 =nn.Sequential( nn.Linear(in_features=7*7*512, out_features=4096), nn.ReLU())
self.linear2 =nn.Sequential( nn.Linear(4096, 4096), nn.ReLU())
self.output = nn.Linear(4096, 5)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = self.flatten(x)
x = self.linear1(x)
x = self.linear2(x)
output = self.output(x)
return output
module = VGGNet()
print(module)
import paddle.nn as nn
import paddle.nn.functional as F
class ConvPool(nn.Layer): #卷积+池化
# 完成此部分代码
def __init__(self):
super(ConvPool, self).__init__()
self.conv1_1 = nn.Conv2D(3, 64, 3, 1, 1)
self.conv1_2 = nn.Conv2D(64, 64, 3, 1, 1)
self.pool1 = nn.MaxPool2D(kernel_size=2, stride=2)
self.conv2_1 = nn.Conv2D(64, 128, 3, 1, 1)
self.conv2_2 = nn.Conv2D(128, 128, 3, 1, 1)
self.pool2 = nn.MaxPool2D(2)
self.conv3_1 = nn.Conv2D(128, 256, 3, 1, 1)
self.conv3_2 = nn.Conv2D(256, 256, 3, 1, 1)
self.conv3_3 = nn.Conv2D(256, 256, 3, 1, 1)
self.pool3 = nn.MaxPool2D(2)
self.conv4_1 = nn.Conv2D(256, 512, 3, 1, 1)
self.conv4_2 = nn.Conv2D(512, 512, 3, 1, 1)
self.conv4_3 = nn.Conv2D(512, 512, 3, 1, 1)
self.pool4 = nn.MaxPool2D(2)
self.conv5_1 = nn.Conv2D(512, 512, 3, 1, 1)
self.conv5_2 = nn.Conv2D(512, 512, 3, 1, 1)
self.conv5_3 = nn.Conv2D(512, 512, 3, 1, 1)
self.pool5 = nn.MaxPool2D(2)
self.flatten = nn.Flatten()
self.linear1 = nn.Linear(in_features=25088, out_features=4096)
self.linear2 = nn.Linear(4096, 4096)
self.linear3 = nn.Linear(4096, 5)
def forward(self, x):
x = self.conv1_1(x)
x = F.relu(x)
x = self.conv1_2(x)
x = F.relu(x)
x = self.pool1(x)
x = self.conv2_1(x)
x = F.relu(x)
x = self.conv2_2(x)
x = F.relu(x)
x = self.pool2(x)
x = self.conv3_1(x)
x = F.relu(x)
x = self.conv3_2(x)
x = F.relu(x)
x = self.conv3_3(x)
x = F.relu(x)
x = self.pool3(x)
x = self.conv4_1(x)
x = F.relu(x)
x = self.conv4_2(x)
x = F.relu(x)
x = self.conv4_3(x)
x = F.relu(x)
x = self.pool4(x)
x = self.conv5_1(x)
x = F.relu(x)
x = self.conv5_2(x)
x = F.relu(x)
x = self.conv5_3(x)
x = F.relu(x)
x = self.pool5(x)
x = self.flatten(x)
x = self.linear1(x)
x = F.relu(x)
x = self.linear2(x)
x = F.relu(x)
x = self.linear3(x)
return x
cnn = ConvPool()
print(cnn)
### 定义画曲线函数
def draw_process(title,color,iters,data,label):
plt.title(title, fontsize=24)
plt.xlabel('iter_num',fontsize=20)
plt.ylabel(label,fontsize=20)
plt.plot(iters,data,color=color,label=label)
plt.legend()
plt.grid()
plt.show()
## 训练代码
model = VGGNet() #实例化网络模型
# model = ConvPool()
###完成此部分代码
import numpy
Iters = []
total_loss = []
total_acc = []
optimizer = paddle.optimizer.Adam(learning_rate=train_parameters["learning_rate"], parameters=model.parameters())
for epoch in range(train_parameters["train_batch_size"]):
for step, data in enumerate(train_loader()):
x_data = data[0]
y_data = data[1]
prediction = model(x_data)
loss = paddle.nn.functional.cross_entropy(prediction, y_data)
loss.backward()
optimizer.step()
optimizer.clear_grad()
Iters.append(step)
total_loss.append(loss.numpy())
correctness = paddle.metric.accuracy(prediction,y_data)
total_acc.append(correctness.numpy()[0])
print('Step:', step, '|| Loss: %.4f' % loss.numpy(), '|| Accuracy: %.2f' % correctness.numpy()[0])
# 保存模型参数
paddle.save(model.state_dict(), "work/checkpoints/save_dir_final.pdparams")
draw_process("Trainning Loss","red",Iters,total_loss,"Trainning Loss")
draw_process("Trainning Acc","blue",Iters,total_acc,"Trainning Acc")
print('done!')
1、画图所使用的数据需要为numpy类型
2、VGGnet的输出为一个1xBatchSize的tensor矩阵,包含BatchSize个预测概率([属于第一类的概率, 属于第二类的概率,...]);
3、批训练中的批次(本程序以step对应批次)默认为numpy且为int型;
4、残差计算输出loss为tensor类型,需用.numpy()进行转化;
5、paddle.metric.accuracy()能够计算某批次数个预测的正确率,输出为一个1x1的tensor,转化为numpy类型后是一个1x1的numpy矩阵,故再最后还要取[0]或者利用.sequeeze()后缀降维才能得到一个数。
6、由于使用的是免费的算力,训练一轮就要花费一个半小时左右,所以我这里只训练了一轮,故准确率不是特别好(正确率在60%浮动),但是可以到loss在下降、准确度在上升的趋势,后续如果再重新训练(训练轮数上升到十几轮)会再更新。
##图像预处理
def unzip_infer_data(src_path,target_path):
##解压预测数据集
if (not os.path.isdir(target_path + 'test')):
z = zipfile.ZipFile(src_path,'r')
z.extractall(path = target_path)
z.close()
def load_image(image_path):
##预测图片预处理
img = Image.open(image_path)
if img.mode != 'RGB':
img = img.convert('RGB')
img = img.resize((224,224),Image.BILINEAR)
img = np.array(img).astype('float32')
img = img.transpose((2,0,1))/255 #HWC 转置为CHW及归一化
return img
infer_src_path = "/home/aistudio/work/test.zip"
infer_dst_path = "/home/aistudio/work/test/"
unzip_infer_data(infer_src_path,infer_dst_path)
label_dict = train_parameters['label_dict']
## 读入模型参数文件
model_state_dict = paddle.load('work/checkpoints/save_dir_final.pdparams')
model_pred = VGGNet() #实例化网络
model_pred.set_state_dict(model_state_dict)
#完成此部分代码
import os
import paddle.vision.transforms as T
class_num = {'0' : 'lawn', '1' : 'river', '2' : 'desert', '3' : 'church', '4' : 'ice'}
isExists = os.path.exists('/home/aistudio/work/test_result')
if not isExists:
os.makedirs('/home/aistudio/work/test_result')
test_result = open('/home/aistudio/work/test_result/test_result.txt', 'w')
transform = T.ToTensor()
datanames = os.listdir(infer_dst_path)
for i in datanames:
img_name = infer_dst_path + i
# print(img_name)
img_test = load_image(img_name)
img_test_tensor = transform(img_test)
img_test_tensor_ = paddle.unsqueeze(img_test_tensor, axis=0)
img_test_tensor_ = img_test_tensor_.transpose((0, 2, 1, 3))
img_pred = model_pred(img_test_tensor_)
# print(img_pred)
img_num = paddle.argmax(img_pred).numpy()[0]
img_class = class_num[str(img_num)]
print('||Name:', i, '||Class:', img_class, '||')
test_result.write('||Name:' + i + '||Class:' + img_class + '||' + '\n')
test_result.close()
1、os.listdir(/路径)可以读取指定路径下的文件名;
2、由于这里的测试集不经过批处理环节,无法自动转化为tensor格式以及增加BatchSize维度,所以这里手动转化格式(transform),在0号位增加一个维度;
3、出现问题:经过转换后的图片仍然放不进神经网络,发现是图片信息矩阵有误,因此使用.transpose()更改信息顺序,就可以放进网络进行预测了;
4、由于训练轮数比较低,实验结果不尽人意,暂时先不放在博客中。