1、使用python搭建自己的网络,本文实现VGG16
# -*- coding:utf-8 -*-
import caffe
from caffe import layers as L, params as P, to_proto
from caffe.proto import caffe_pb2
frozen_weight_param = dict(lr_mult=1)#权重
frozen_bias_param = dict(lr_mult=2)#偏执值
froozen_param = [frozen_weight_param, frozen_bias_param]
block_num = 0
layer_num = 0
net = caffe.NetSpec()
def block(layer_name, inputs, filters_in, filters_out, kernel_size = 3, stride = 1):
net['conv_'+layer_name] = L.Convolution(net[inputs],
param=froozen_param,
num_output=filters_out,
pad=1,
kernel_size=kernel_size,
stride=stride,
weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
)
net['relu_'+layer_name] = L.ReLU(net['conv_'+layer_name], in_place=True)
return 'relu_'+layer_name
def laysers(inputs, filters_in, filters_out, h, w):
global layer_num
layer_num += 1
block_num = 1
layer_name = str(layer_num)+'_'+str(block_num)
block1_res = block(layer_name, inputs, filters_in, filters_out, 3, 1)
block_num += 1
layer_name = str(layer_num)+'_'+str(block_num)
block2_res = block(layer_name, block1_res, filters_in, filters_out, 3, 1)
if layer_num > 3:
block_num += 1
layer_name = str(layer_num)+'_'+str(block_num)
block3_res = block(layer_name, block2_res, filters_in, filters_out, 3, 1)
block_num += 1
layer_name = str(layer_num)+'_'+str(block_num)
block4_res = block(layer_name, block3_res, filters_in, filters_out, 3, 1)
blocks_res = block4_res
else:
blocks_res = block2_res
net['pool_'+str(layer_num)] = L.Pooling(net[blocks_res],
# pool=caffe.params.Pooling.MAX,
pool = 0,
kernel_h = h,
kernel_w = w,
stride = 2)
return 'pool_'+str(layer_num)
def VGG16_net(batch_size, input_height, input_width, channels):
filter_num = [3, 8, 16, 32, 64, 64]
net['data'], net['label'] = L.Python(module = 'mydatalayer', layer = 'Datalayer', ntop = 2)
layer_1 = laysers('data', filter_num[0], filter_num[1], 2, 2)
layer_2 = laysers(layer_1, filter_num[1], filter_num[2], 2, 2)
layer_3 = laysers(layer_2, filter_num[2], filter_num[3], 2, 2)
layer_4 = laysers(layer_3, filter_num[3], filter_num[4], 2, 2)
layer_5 = laysers(layer_4, filter_num[4], filter_num[5], 2, 2)
net['fc1'] = L.InnerProduct(net[layer_5], num_output=500, weight_filler=dict(type='xavier'))
net['relu_6'] = L.ReLU(net['fc1'], in_place=True)
net['fc2'] = L.InnerProduct(net['relu_6'], num_output=128, weight_filler=dict(type='xavier'))
net['relu_7'] = L.ReLU(net['fc2'], in_place=True)
net['fc3'] = L.InnerProduct(net['relu_7'], num_output=17, weight_filler=dict(type='xavier'))
net.loss=caffe.layers.SoftmaxWithLoss(net['fc3'], net['label'])
net.accuracy=caffe.layers.Accuracy(net['fc3'], net['label'])
return net.to_proto()
if __name__ == '__main__':
with open('VGG16_net.prototxt', 'w') as f:
f.write(str(VGG16_net(1, 224, 224, 3)))
由于不知道如何直接生成python层网络结构,需要将生成的prototxt文件中的数据层进行修改,其中module为自己定义的文件名,layer为类名。
layer {
name: "data"
type: "Python"
top: "data"
top: "label"
include {
phase: TRAIN
}
python_param{
module: 'dataLayer'
layer: 'DataLayer'
}
}
2、定义自己的数据层文件
import caffe
import numpy as np
import cv2
import numpy.random as random
import os
import random
class DataLayer(caffe.Layer):
def setup(self, bottom, top):
self.top_names=['data', 'label']
self.batch_size = 20
self.file_name = "./train_data_list.txt"
self.batch_loader = Batch_loader(self.file_name)
top[0].reshape(self.batch_size, 3, 224, 224)
top[1].reshape(self.batch_size, 1)
def forward(self, bottom, top):
for itm in range(self.batch_size):
img, label = self.batch_loader.load_next_image()
top[0].data[itm, ...] = img
top[1].data[itm, ...] = label
def reshape(self, bottom, top):
pass
def backward(self, top, bottom):
pass
class Batch_loader(object):
"""docstring for Batch_loader"""
def __init__(self, file_name):
self.crop_height = 224
self.crop_width = 224
self.isshuffle = True
self.file = file_name
self.imagelist = open(self.file,'r').read().splitlines()
self.cur = 0
print('Batch_loader initialize with {} images'.format(len(self.imagelist)))
def load_next_image(self):
if self.cur == len(self.imagelist):
self.cur = 0
if self.isshuffle:
self.cur = random.randint(0, len(self.imagelist)-1)
image_sample = self.imagelist[self.cur]
image_name = image_sample.split(' ')[0]
image = np.array(cv2.imread(image_name))
image = cv2.resize(image, (self.crop_width, self.crop_height))
image = image/255.0
image = image.transpose((2,0,1))
label = int(image_sample.split(' ')[1])
self.cur += 1
return image, label
3、训练配置文件
由于数据样本较少,base_lr和weight_decay需要设置较小些
# The train/test net 文件路径
train_net: "VGG16_net.prototxt"
# The base learning rate and the weight decay of the network.
base_lr: 0.00005
weight_decay: 0.00005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0008
power: 0.75
# 多少次迭代输出一次信息
display: 1000000
# The maximum number of iterations
max_iter: 270000
# 存储中间结果
snapshot: 5000
snapshot_prefix: "snapshot/unet0"
# solver mode: CPU or GPU
solver_mode: CPU
type: "Adam"
4、开始训练
import caffe
import numpy as np
import time
solver = caffe.AdamSolver('solver.prototxt')
epochs = 200
train_iter_per_epoch = 1360//40
t1 = time.time()
g_avg_loss = []
g_avg_acc = []
best_acc = 0
for i in range(epochs):
avg_loss = np.zeros(train_iter_per_epoch)
avg_acc = np.zeros(train_iter_per_epoch)
for j in range(train_iter_per_epoch):
solver.step(1)
avg_loss[j] = solver.net.blobs['loss'].data
avg_acc[j] = solver.net.blobs['accuracy'].data
# print(solver.net.blobs['result3'].data.shape)
if (j+1) % 10 == 0 or (j+1) == train_iter_per_epoch:
t2 = time.time()
mean_acc = avg_acc.sum()/(j+1)
mean_loss = avg_loss.sum()/(j+1)
g_avg_loss.append(mean_loss)
g_avg_acc.append(mean_acc)
print('epoch: %d, iters: %d, loss: %.4f, acc: %.4f, finished: %.2f %% cost time: %.2fs\r'%(i+1, i*train_iter_per_epoch+j, mean_loss, mean_acc, 100.0*(j+1)/train_iter_per_epoch, t2-t1))
t1 = time.time()
# print('epoch: %d, iter: %d, loss: %.4f, acc: %.4f'%(i*1250+j, mean_loss, mean_acc))
if avg_acc.mean() > best_acc:
best_acc = avg_acc.mean()
solver.net.save('results/iter_%d_best_acc=%.4f.caffemodel'%(i+1, best_acc))
print()
with open('results/loss.txt', 'w') as f:
for val in g_avg_loss:
f.write("%.4f\n"%val)
with open('results/acc.txt', 'w') as f:
for val in g_avg_acc:
f.write("%.4f\n"%val)
import matplotlib.pyplot as plt
plt.imshow(g_avg_loss)
plt.show()
4、opencv_c++读取生成的model进行预测
需要对prototxt进行修改:
(1)将输入层修改为如下
input: "data"
input_dim:1
input_dim:2
input_dim:224
input_dim:224
(2)删除loss层和accury层,添加softmax层
layer {
name: "predict"
type: "Softmax"
bottom: "fc3"
top: "predict"
}
#include
#include
#include
using namespace std;
int main()
{
string modelTxt = "VGG16_net_test.prototxt";
string modelBin = "test.caffemodel";
cv::dnn::Net net;
try {
net = cv::dnn::readNetFromCaffe(modelTxt, modelBin);
}
catch (cv::Exception &ee) {
cout << "readNetFromCaffe error!" << endl;
return -1;
}
cv::Mat inputBlob;
int cnt = 0;
while (true) {
string data_path = "..\\..\\test_data\\";
string img_name = to_string(cnt);
img_name = img_name + ".jpg";
img_name = data_path + img_name;
cv::Mat img = cv::imread(img_name, 1);
cv::resize(img, img, cv::Size(224, 224));
cv::imshow("img", img);
cv::waitKey(1);
cv::dnn::blobFromImage(img, inputBlob, 1 / 255.0, cv::Size(224, 224), cv::Scalar(127.5, 127.5, 127.5), false);
net.setInput(inputBlob, "data");
int predic_max = INT_MIN;
int predic_index = 18;
cv::Mat detection = net.forward("predict");
cv::Mat probMat = detection.reshape(1, 1);
cv::Point classNumber;
double classProb;
cv::minMaxLoc(probMat, NULL, &classProb, NULL, &classNumber);
int classInx = classNumber.x;
cout << "this predict is : " << classInx << endl;
cnt++;
}
return 0;
}