这几天使用Pytorch学习搭建CNN,但是运行时使用的数据集都是torchivison.datasets提供的,所以想自己创建数据集训练看看效果。
之前在udacity上学习的CNN课程,当时使用tensorflow预测狗狗的模型。所以这次使用相同的图片,看看Pytorch搭建的cnn训练效果如何。我是代码地址
首先,我们写一个方法load_data(data_path),输入图片的地址,返回训练要使用的data_x,data_y,由于图片的大小不一致,所以我们将所有的图片输入x重定义为(1,128,128)格式,1层是灰度图,128x128是像素,data_y则是相对应的狗的类型,我这里因为测试方便,只选择10种狗的图片来学习,所以狗的类型为000,001,...,010。
# 导入数据 def load_data(data_path): signal = os.listdir(data_path) for fsingal in signal: filepath = data_path + fsingal filename = os.listdir(filepath) for fname in filename: ffpath = filepath + "/" + fname path = [fsingal, ffpath] all_path.append(path) # 设立数据集多大 count = len(all_path) data_x = np.empty((count, 1, 128, 128), dtype="float32") data_y = [] # 打乱顺序 random.shuffle(all_path) i = 0 # 读取图片 这里是灰度图 最后结果是i*i*i*i # 分别表示:batch大小 , 通道数, 像素矩阵 for item in all_path: img = cv2.imread(item[1], 0) img = cv2.resize(img, (128, 128)) arr = np.asarray(img, dtype="float32") data_x[i, :, :, :] = arr i += 1 data_y.append(int(item[0][:3])) # 文件名类似001.Affenpinscher...只截取前3作为data_y data_x = data_x / 255 data_y = np.asarray(data_y) data_x = torch.from_numpy(data_x) data_y = torch.from_numpy(data_y) return data_x, data_y
然后我们要建立cnn模型,模型输入卷积1、卷积2、全链接层,比较简单,和tensorflow一样
配置optimizer和loss_func
class L5_NET(nn.Module): def __init__(self): super(L5_NET, self).__init__(); # 第一层输入1,输出16,kernel = 5, stride = 1,padding = 2 self.conv1 = nn.Sequential( nn.Conv2d(1, 16, 5, 1, 2), nn.ReLU(), # activation nn.MaxPool2d(2), # output shape (16, 64, 64) ) # 第二层输入16,32,5,1,2 self.conv2 = nn.Sequential( nn.Conv2d(16, 32, 5, 1, 2), nn.ReLU(), # activation nn.MaxPool2d(2), # output shape (32, 32, 32) ) self.fc = nn.Linear(32 * 32 * 32, 10) # 10个输出 softmax # 前向传播 def forward(self, x): x = self.conv1(x) x = self.conv2(x) # 平铺轴32*32*32个神经元 x = x.view(x.size(0), -1) out = self.fc(x) return out model = L5_NET() optimizer = optim.SGD(model.parameters(), lr=train_lr, momentum=train_momentum) loss_func = nn.CrossEntropyLoss()
然后,训练模型,这里按部就班,准确率大于91%就保存模型
# 预测函数 def train(epoch): for batch_idx, (data, target) in enumerate(train_load): data, target = Variable(data), Variable(target) # 求导 optimizer.zero_grad() # 训练模型,输出结果 output = model(data) # 反向传播调整参数pytorch直接可以用loss loss = loss_func(output, target) loss.backward() # SGD刷新进步 optimizer.step() # 实时输出 if batch_idx % 10 == 0: test_output = model(test_x) pred_y = torch.max(test_output, 1)[1].data.numpy() accuracy = float((pred_y == test_y.data.numpy()).astype(int).sum()) / float(test_y.size(0)) print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy) print(accuracy) if accuracy > 0.91: torch.save(model, 'dogmodel.pkl') break
最后,我们可以看一下预测的结果,调用模型,看结果是否准确
if TRAIN: for epoch in range(1, train_epoch + 1): train(epoch) else: model = torch.load('dogmodel.pkl') img = cv2.imread('008dog.jpg', 0) img = cv2.resize(img, (128, 128)) arr = np.asarray(img, dtype="float32") data_x = np.empty((1, 1, 128, 128), dtype="float32") data_x[0, :, :, :] = arr data_x = data_x / 255 data_x = torch.from_numpy(data_x) pred_y = torch.max(model(data_x), 1)[1].data.numpy() print(pred_y)