caffe添加python层读取图片数据进行分类任务

 

1、使用python搭建自己的网络,本文实现VGG16

# -*- coding:utf-8 -*- 

import caffe
from caffe import layers as L, params as P, to_proto
from caffe.proto import caffe_pb2

frozen_weight_param = dict(lr_mult=1)#权重
frozen_bias_param = dict(lr_mult=2)#偏执值
froozen_param = [frozen_weight_param, frozen_bias_param]

block_num = 0
layer_num = 0

net = caffe.NetSpec()


def block(layer_name, inputs, filters_in, filters_out, kernel_size = 3, stride = 1):
	net['conv_'+layer_name] = L.Convolution(net[inputs],
                                param=froozen_param,  
                                num_output=filters_out,
                                pad=1,
                                kernel_size=kernel_size,
                                stride=stride,
                                weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')
                                )
	net['relu_'+layer_name] = L.ReLU(net['conv_'+layer_name], in_place=True)

	return 'relu_'+layer_name


def laysers(inputs, filters_in, filters_out, h, w):
	global layer_num
	layer_num += 1
	block_num = 1
	layer_name = str(layer_num)+'_'+str(block_num)
	
	block1_res = block(layer_name, inputs, filters_in, filters_out, 3, 1)
	block_num += 1
	layer_name = str(layer_num)+'_'+str(block_num)
	block2_res = block(layer_name, block1_res, filters_in, filters_out, 3, 1)

	if layer_num > 3:
		block_num += 1
		layer_name = str(layer_num)+'_'+str(block_num)
		block3_res = block(layer_name, block2_res, filters_in, filters_out, 3, 1)

		block_num += 1
		layer_name = str(layer_num)+'_'+str(block_num)
		block4_res = block(layer_name, block3_res, filters_in, filters_out, 3, 1)

		blocks_res = block4_res
	else:
		blocks_res = block2_res

	net['pool_'+str(layer_num)] = L.Pooling(net[blocks_res],
		# pool=caffe.params.Pooling.MAX,
		pool = 0,
		kernel_h = h,
		kernel_w = w,
		stride = 2)
	return 'pool_'+str(layer_num)
    


def VGG16_net(batch_size, input_height, input_width, channels):
	filter_num = [3, 8, 16, 32, 64, 64]
	net['data'], net['label'] = L.Python(module = 'mydatalayer', layer = 'Datalayer', ntop = 2)

	layer_1 = laysers('data', filter_num[0], filter_num[1], 2, 2)
	layer_2 = laysers(layer_1, filter_num[1], filter_num[2], 2, 2)
	layer_3 = laysers(layer_2, filter_num[2], filter_num[3], 2, 2)
	layer_4 = laysers(layer_3, filter_num[3], filter_num[4], 2, 2)
	layer_5 = laysers(layer_4, filter_num[4], filter_num[5], 2, 2)

	net['fc1'] =   L.InnerProduct(net[layer_5], num_output=500, weight_filler=dict(type='xavier'))
	net['relu_6'] = L.ReLU(net['fc1'], in_place=True)

	net['fc2'] =   L.InnerProduct(net['relu_6'], num_output=128, weight_filler=dict(type='xavier'))
	net['relu_7'] = L.ReLU(net['fc2'], in_place=True)

	net['fc3'] =   L.InnerProduct(net['relu_7'], num_output=17, weight_filler=dict(type='xavier'))


	net.loss=caffe.layers.SoftmaxWithLoss(net['fc3'], net['label'])
	net.accuracy=caffe.layers.Accuracy(net['fc3'], net['label'])

	return net.to_proto()


if __name__ == '__main__':
	with open('VGG16_net.prototxt', 'w') as f:
		f.write(str(VGG16_net(1, 224, 224, 3)))

由于不知道如何直接生成python层网络结构,需要将生成的prototxt文件中的数据层进行修改,其中module为自己定义的文件名,layer为类名。

layer {
  name: "data"
  type: "Python"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  }
  python_param{
    module: 'dataLayer'
    layer: 'DataLayer'
    }
}

2、定义自己的数据层文件

import caffe
import numpy as np
import cv2
import numpy.random as random
import os
import random

class DataLayer(caffe.Layer):
	
	def setup(self, bottom, top):
		self.top_names=['data', 'label']
		self.batch_size = 20
		self.file_name = "./train_data_list.txt"
		self.batch_loader = Batch_loader(self.file_name)
		top[0].reshape(self.batch_size, 3, 224, 224)
		top[1].reshape(self.batch_size, 1)

	def forward(self, bottom, top):
		
		for itm in range(self.batch_size):
			img, label = self.batch_loader.load_next_image()
			top[0].data[itm, ...] = img
			top[1].data[itm, ...] = label

	def reshape(self, bottom, top):
		pass

	def backward(self, top, bottom):
		pass

class Batch_loader(object):
	"""docstring for Batch_loader"""
	def __init__(self, file_name):
		self.crop_height = 224
		self.crop_width = 224
		self.isshuffle = True
		self.file = file_name
		self.imagelist = open(self.file,'r').read().splitlines()
		self.cur = 0


		print('Batch_loader initialize with {} images'.format(len(self.imagelist)))

	def load_next_image(self):
		if self.cur == len(self.imagelist):
			self.cur = 0

		if self.isshuffle:
			self.cur = random.randint(0, len(self.imagelist)-1)

		image_sample = self.imagelist[self.cur]

		image_name = image_sample.split(' ')[0]
		image = np.array(cv2.imread(image_name))
		image = cv2.resize(image, (self.crop_width, self.crop_height))
		image = image/255.0
		image = image.transpose((2,0,1))

		label = int(image_sample.split(' ')[1])
		self.cur += 1

		return image, label

3、训练配置文件

由于数据样本较少,base_lr和weight_decay需要设置较小些

# The train/test net 文件路径
train_net: "VGG16_net.prototxt"


# The base learning rate and the weight decay of the network.
base_lr: 0.00005
weight_decay: 0.00005

# The learning rate policy
lr_policy: "inv"
gamma: 0.0008
power: 0.75


# 多少次迭代输出一次信息
display: 1000000
# The maximum number of iterations
max_iter: 270000
# 存储中间结果
snapshot: 5000
snapshot_prefix: "snapshot/unet0"

# solver mode: CPU or GPU
solver_mode: CPU
type: "Adam"

4、开始训练

import caffe
import numpy as np
import time

solver = caffe.AdamSolver('solver.prototxt')

epochs = 200
train_iter_per_epoch = 1360//40
t1 = time.time()
g_avg_loss = []
g_avg_acc = []
best_acc = 0
for i in range(epochs):
    avg_loss = np.zeros(train_iter_per_epoch)
    avg_acc = np.zeros(train_iter_per_epoch)
    for j in range(train_iter_per_epoch):
        solver.step(1)
        avg_loss[j] = solver.net.blobs['loss'].data
        avg_acc[j] = solver.net.blobs['accuracy'].data
        # print(solver.net.blobs['result3'].data.shape)
        if (j+1) % 10 == 0 or (j+1) == train_iter_per_epoch:
            t2 = time.time()
            mean_acc = avg_acc.sum()/(j+1)
            mean_loss = avg_loss.sum()/(j+1)
            g_avg_loss.append(mean_loss)
            g_avg_acc.append(mean_acc)
            print('epoch: %d, iters: %d, loss: %.4f, acc: %.4f, finished: %.2f %% cost time: %.2fs\r'%(i+1, i*train_iter_per_epoch+j, mean_loss, mean_acc, 100.0*(j+1)/train_iter_per_epoch, t2-t1))
            t1 = time.time()
            # print('epoch: %d, iter: %d, loss: %.4f, acc: %.4f'%(i*1250+j, mean_loss, mean_acc))
    if avg_acc.mean() > best_acc:
        best_acc = avg_acc.mean()
        solver.net.save('results/iter_%d_best_acc=%.4f.caffemodel'%(i+1, best_acc))
    print()


with open('results/loss.txt', 'w') as f:
	for val in g_avg_loss:
		f.write("%.4f\n"%val)
with open('results/acc.txt', 'w') as f:
	for val in g_avg_acc:
		f.write("%.4f\n"%val)

import matplotlib.pyplot as plt 
plt.imshow(g_avg_loss)
plt.show()

4、opencv_c++读取生成的model进行预测

需要对prototxt进行修改:

(1)将输入层修改为如下

input: "data"
input_dim:1
input_dim:2
input_dim:224
input_dim:224

 (2)删除loss层和accury层,添加softmax层

layer {
  name: "predict"
  type: "Softmax"
  bottom: "fc3"
  top: "predict"
}
#include 
#include 
#include
using namespace std;



int main() 
{

	string modelTxt = "VGG16_net_test.prototxt";
	string modelBin = "test.caffemodel";

	cv::dnn::Net net;
	try {

		net = cv::dnn::readNetFromCaffe(modelTxt, modelBin);

	}
	catch (cv::Exception &ee) {
		cout << "readNetFromCaffe error!" << endl;
		return -1;
	}


	cv::Mat inputBlob;
	int cnt = 0;
	while (true) {
		
		string data_path = "..\\..\\test_data\\";
		string img_name = to_string(cnt);
		img_name = img_name + ".jpg";
		img_name = data_path + img_name;
		cv::Mat img = cv::imread(img_name, 1);
		cv::resize(img, img, cv::Size(224, 224));

		cv::imshow("img", img);
		cv::waitKey(1);

		cv::dnn::blobFromImage(img, inputBlob, 1 / 255.0, cv::Size(224, 224), cv::Scalar(127.5, 127.5, 127.5), false);
		net.setInput(inputBlob, "data");

		int predic_max = INT_MIN;
		int predic_index = 18;
		cv::Mat detection = net.forward("predict");

		cv::Mat probMat = detection.reshape(1, 1);

		cv::Point classNumber;
		double classProb;
		
		cv::minMaxLoc(probMat, NULL, &classProb, NULL, &classNumber);
		int classInx = classNumber.x;

		cout << "this predict is : " << classInx << endl;

		cnt++;


	}


	return 0;
}

 

你可能感兴趣的:(Deep,learning)