caffe python接口搭建&训练深度学习网络

一、 数据准备,生成lmdb文件

方案一:直接通过cmd执行以下命令:

convert_imageset.exe --resize_height=240 --resize_width=240 --shuffle --backend="lmdb" G:\ G:\caffe_python\label\label_train.txt G:\caffe_python\label\train_lmdb

convert_imageset.exe 由caffe编译时得到,路径一般为:\Build\x64\Release。resize_height & resize_width对原始图片resize到统一大小。G:\为训练图片的路径。label_train.txt为训练图片的标注数据,格式:filename  label。最后生成的lmdb文件为G:\caffe_python\label\train_lmdb。同理可生成验证集lmdb文件。

train_lmdb中数据保存格式

方案二:通过Python脚本生成lmdb文件

# -*- coding: utf-8 -*-

import sys
reload(sys) 
sys.setdefaultencoding('utf-8')
sys.path.insert(0, 'C:/Anaconda3/envs/py27/Lib/site-packages/pycaffe')

import caffe
import lmdb
import random
import cv2
import numpy as np
from caffe.proto import caffe_pb2
from sklearn.model_selection import train_test_split
from Bconfig import config

def get_dataset(label_dir):
    with open(label_dir, 'r') as f:
        annotations = f.readlines()
        random.shuffle(annotations)
    dataset = []
    for annotation in annotations:
        annotation = annotation.strip().split(' ')
        data_example = dict()
        data_example['filename'] = annotation[0]
        data_example['label'] = int(annotation[1])
        dataset.append(data_example)        
    return dataset

if __name__ == '__main__':
    train_image_path = config.train_image_path
    train_list_path = config.train_list_path
    batch_size = config.BATCH_SIZE
    patchSize = config.PatchSize
    lmdb_file = config.trainlmdb_file   #存放lmdb数据的目录
    
    dataset = get_dataset(train_list_path)
    trainDataset, testDataset = train_test_split(dataset, test_size=0.1, random_state=1)
    fw = open(config.lmdb_record, 'w')

    #打开lmdb环境,生成一个数据文件,定义最大空间1e12   
    lmdb_env = lmdb.open(lmdb_file, map_size=int(1e12))
    lmdb_txn = lmdb_env.begin(write=True) #创建操作数据库句柄
    datum = caffe_pb2.Datum()
    
    for idx, image_example in enumerate(trainDataset):
        filename = image_example['filename']
        label = image_example['label']
        
        image = cv2.imdecode(np.fromfile(filename, dtype=np.uint8),-1)
        resizeImg = cv2.resize(image, (patchSize, patchSize))
        #save in datum
        datum = caffe.io.array_to_datum(resizeImg, label)   #lmdb每一个数据都由键值对构成
        key_str = '{:0>8d}'.format(idx)  #生成一个用递增顺序排列的定长唯一的key
        lmdb_txn.put(key_str.encode(), datum.SerializeToString())  #调用句柄,写入内存
        print('{:0>8d}'.format(idx) + ':' + filename)
        
        # write batch
        if (idx+1) % batch_size == 0:
            lmdb_txn.commit()
            lmdb_txn = lmdb_env.begin(write=True)
            print(idx + 1)
     
    # write last batch
    if (idx+1) % batch_size != 0:
        lmdb_txn.commit()
        print('last batch')
        print(idx + 1)
            
    lmdb_env.close()   #结束后释放资源
    
    lmdb_env1 = lmdb.open(config.vallmdb_file, map_size=int(1e10))
    lmdb_txn1 = lmdb_env1.begin(write=True) #创建操作数据库句柄
    datum = caffe_pb2.Datum()
    
    for idt, image_example in enumerate(testDataset):
        filename = image_example['filename']
        label = image_example['label']
        
        image = cv2.imdecode(np.fromfile(filename, dtype=np.uint8),-1)
        resizeImg = cv2.resize(image, (patchSize, patchSize))
        #save in datum
        datum = caffe.io.array_to_datum(resizeImg, label)   #lmdb每一个数据都由键值对构成
        key_str = '{:0>8d}'.format(idt)  #生成一个用递增顺序排列的定长唯一的key
        lmdb_txn1.put(key_str.encode(), datum.SerializeToString())  #调用句柄,写入内存
        print('{:0>8d}'.format(idt) + ':' + filename)
        
        # write batch
        if (idt+1) % batch_size == 0:
            lmdb_txn1.commit()
            lmdb_txn1 = lmdb_env1.begin(write=True)
            print(idt + 1)
     
    # write last batch
    if (idt+1) % batch_size != 0:
        lmdb_txn1.commit()
        print('last batch')
        print(idt + 1)
            
    lmdb_env1.close()   #结束后释放资源
    
    fw.write('trainDataset size: %d, testDataset size: %d' %(idx+1, idt+1))
    fw.close()        

二、计算训练数据均值

compute_image_mean --backend="lmdb" train_lmdb mean.binaryproto

compute_image_mean.exe 由caffe编译时得到,只需计算训练集的均值。

同理也可由Python脚本生成:

import sys
sys.path.insert(0, 'C:/Anaconda3/envs/py27/Lib/site-packages/pycaffe')
import caffe
import numpy as np
from Bconfig import config
    
blob = caffe.proto.caffe_pb2.BlobProto()
bin_mean = open(config.mean_binary, 'rb').read()
blob.ParseFromString(bin_mean)
arr = np.array(caffe.io.blobproto_to_array(blob))
npy_mean = arr[0]
np.save(config.mean_npy, npy_mean)

以上生成的是逐像素均值,均值文件大小与图像维度保持一致,图片维度为M*N*C,则均值文件维度为M*N*C。

caffe还可以使用逐通道减均值,每个通道的均值为一个数,直接作为参数在prototxt文件中指定即可,如下图

layer {
  name: "InputData"
  type: "Data"
  top: "data"
  top: "label"
  transform_param {
    mirror: false
    crop_size: 240
    mean_value: 78.3
    mean_value: 76.7
    mean_value: 73.2
  }
  data_param {
    ...
  }
}

关于通道均值的计算,在compute_image_mean.cpp中,保存的是像素均值,同时最后会输出channel mean:

for (int c = 0; c < channels; ++c) {
    for (int i = 0; i < dim; ++i) {
      mean_values[c] += sum_blob.data(dim * c + i);
    }
    LOG(INFO) << "mean_value channel [" << c << "]: " << mean_values[c] / dim;
  }

三、搭建深度学习网络结构

1.卷积层:

from caffe import layers as L
n = caffe.NetSpec()
n.data, n.label = L.Data(source=lmdb, name='InputData', backend=P.Data.LMDB, 
                         batch_size=batch_size, ntop=2, 
                         transform_param=dict(crop_size=240, mean_file=mean_file, mirror=False))
n.conv1 = L.Convolution(n.data, kernel_size=3, stride=1, pad=1, num_output=32, 
                        weight_filler=dict(type='xavier'),
                        bias_term=True,
                        bias_filler=dict(type='constant'),
                        name='conv')

2.激励层ReLU:

n.relu = L.ReLU(n.conv, in_place=True, name='conv_relu')

in_place字段为True,表示其top和bottom是一样的情况。

3.池化层:

n.pool_max = L.Pooling(n.relu1, pool=P.Pooling.MAX, kernel_size=4, stride=3, pad=0,         
                       name='pool_max')
n.pool_ave = L.Pooling(n.conv_out, pool=P.Pooling.AVE, kernel_size=12, stride=1, pad=0,
                       name='pool_ave')
n.global_ave = L.Pooling(n.dense3, pool=P.Pooling.AVE, global_pooling=True,         
                         name='pool_global_ave')

caffe中卷积层输出的feature map尺寸计算方式为:

out_size = (in_size + 2*pad - kernel_size) / stride +1

池化层输出的feature map尺寸计算方式为:

out_size = ceil [ (in_size + 2*pad - kernel_size) / stride ]+1

其计算方式与tensorflow略有不同,caffe中卷积和池化层的输出尺寸计算方式也不一样。详细可查阅caffe源码“caffe/layers/conv_layer.cpp” 和“caffe/layers/pooling_layer.cpp”

4.BatchNorm层:

n.bachnorm = L.BatchNorm(n.pool1, include=dict(phase=caffe.TRAIN), in_place=True,
                          batch_norm_param=dict(moving_average_fraction=0.9), name='bn')
n.scale_bn=L.Scale(n.bachnorm1, scale_param=dict(bias_term=True), in_place=True, name='bn_scale')

5.全连接层(fully_connected):

n.innerP = L.InnerProduct(n.scale_bn, num_output=class_num, 
                          weight_filler=dict(type='xavier'),
                          bias_filler=dict(type='constant',value=0),
                          name='inner_product')

6.Softmax层:

n.loss = L.SoftmaxWithLoss(n.innerP, n.label)

7.Accuracy层:

n.acc = L.Accuracy(n.innerP, n.label, accuracy_param=dict(top_k=1)) 

将网络结构写入到prototxt文件,即caffe格式的网络结构搭建文件。将生成好的.prototxt文件拷入到http://ethereon.github.io/netscope/#/editor,即可查看网络结构图。

数据输入网络时,L.Data输入参数即为之前生成的lmdb格式数据,若要对图片采取一些额外的在线数据增强,或针对已生成的lmdb数据改变增强方式,可在data_layer.cpp中修改源码(位于caffe目录下.\src\caffe\layers),针对每一个batch读入的数据进行在线数据增强等处理。具体修改方式可见https://blog.csdn.net/qq295456059/article/details/53494612。

四、Solver文件

利用Solver文件配置相关训练及测试参数。

from caffe.proto import caffe_pb2
from Bconfig import config

sp = caffe_pb2.SolverParameter()

solver_file = config.solver_file   #solver文件保存位置

sp.train_net = config.train_proto   #上一环节得到的prototxt文件
sp.test_net.append(config.val_proto)

sp.test_interval = 1405      #测试间隔
sp.test_iter.append(157)   # 测试迭代次数
sp.max_iter = 210750        #最大迭代次数
sp.base_lr = 0.001         #基础学习率
sp.momentum = 0.9          #momentum系数
sp.weight_decay = 5e-4     #权值衰减系数
sp.lr_policy = 'step'      #学习率衰减方法
sp.stepsize = 70250
sp.gamma = 0.1             #学习率衰减指数
sp.display = 1405
sp.snapshot = 1405
sp.snapshot_prefix = './model/BeltClassify'   #保存model前半部分
sp.type = "SGD"     #优化算法
sp.solver_mode = caffe_pb2.SolverParameter.GPU

with open(solver_file, 'w') as f:
    f.write(str(sp))

五、训练模型

import caffe
caffe.set_device(0)
caffe.set_mode_gpu()
solver = caffe.SGDSolver('G:/Belt_CaffeP/prototxt/solver.prototxt')
test_iter = 157
test_interval = 1405
epoch_num = 150
#solver.net.forward()
#solver.solve()
#iter = solver.iter
for i in range(epoch_num):
    for j in range(test_interval):
        solver.step(1)    #单步训练更新参数
        loss_train = solver.net.blobs['loss'].data
        acc_train = solver.net.blobs['acc'].data
        print('epoch %d %d/%d: loss_train: %.4f, accuracy_train: %.4f' %(i, j, test_interval, loss_train, acc_train))

    for test_i in range(test_iter):
        solver.test_nets[0].forward()   #test net
        loss_test = solver.test_nets[0].blobs['loss'].data
        acc_test = solver.test_nets[0].blobs['acc'].data
        print('epoch %d %d/%d: loss: %.4f, accuracy: %.4f' %(i, test_i, test_iter, loss_test, acc_test))
        
f.close()

以上可得到每一轮迭代时训练集和测试集的loss及accuracy。

六、测试图片

方法一:直接调用opencv 读入的图像

net = caffe.Net(prototxt, model, caffe.TEST)
img_bgr = cv2.imdecode(np.fromfile('XXX.jpg', dtype=np.uint8), -1)
image = img_bgr - mean   #减均值操作
input_img = image.transpose((2,0,1))
net.blobs['data'].data[...] = input_img
output = net.forward()
prob = output['prob'][0]         #'prob'为prototxt文件中最后一层的输出量
label_pre = prob.argsort()[-1]       

方法二:caffe加载图像

net = caffe.Net(prototxt, model, caffe.TEST)
image = caffe.io.load_image('XXX.jpg')

transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))     # H*W*C --> C*H*W
transformer.set_raw_scale('data', 255)
transformer.set_mean('data', np.array([mean1, mean2, mean3]))
transformer.set_channel_swap('data', (2,1,0))

net.blobs['data'].data[...] = transformer.preprocess('data',img)
output = net.forward()
prob = output['prob'][0]
label_pre = np.argsort(-prob)[0]

caffe.io.load_image加载的图片为RGB格式,0~1(float),而caffe中图像为BGR格式,图像存储范围[0, 255],因此需转换维度空间,以及取值缩放到0~255。transformer中不考虑设置的图像变换顺序,transformer.preprocess 函数中写明了(1)set_transpose (2)channel_swap (3)raw_scale (4)减mean。

而opencv 读取的图像即为BGR格式,范围为0~255,无需做变换。

你可能感兴趣的:(深度学习)