参考链接:
Caffe官网
Caffe入门小教程
Caffe的全称为Convolutional Architecture for Fast Feature Embedding。主要优势如下:
(1)容易上手,网络结构都是以配置文件.prototxt形式定义,类似json格式,不需要用代码设计网络。每一个神经网络模块都是一个Layer,使用许多顺序连接的Layer来描述神经网络结构。caffe的二进制可执行程序可以提取.prototxt文件并按其定义来训练神经网络。理论上用户可以不执行代码,只需要定义网络结构就可以完成模型的训练。
(2)在GPU上训练的性能很好,并且拥有大量训练好的经典模型(AlexNet,VGG,Inception)等。
(3)组件模块化,可以方便地拓展到新的模型和学习任务。可以把模型文件打包制作成简单易用的接口,比如封装成Python或MATLAB的API。
(4)底层基于C++,具有良好的移植性,支持windows,MAC和linux,可以编译部署到Android和IOS上。提供了Python语言接口pycaffe,在接触新任务和设计网络时可以使用Python接口简化操作。
缺点:
(1)在.prototxt文件内部设计网络结构可能比较受限,没有像TensorFlow那样可以在Python中设计网络结构方便和自由。Caffe的配置文件不能使用编程的方式调整超参数
(2)没有提供像scikit-learn那样好用的estimator可以方便地进行交叉验证和超参数的网格搜索能。
参考:https://blog.csdn.net/yhaolpz/article/details/71375762
sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
sudo apt-get install --no-install-recommends libboost-all-dev
sudo apt-get install libopenblas-dev liblapack-dev libatlas-base-dev
sudo apt-get install libgflags-dev libgoogle-glog-dev liblmdb-dev
sudo apt-get install git cmake build-essential
从官网下载:http://opencv.org/releases.html
选择一个较新版本的opencv3.X,下载source
源代码
下载之后解压,并cd到该文件夹进行编译安装
mkdir build # 创建编译的文件目录
cd build
cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr/local ..
make -j8 #编译
sudo make install #安装
pkg-config --modversion opencv #查看OpenCV的版本
下载Caffe的安装文件:
git clone https://github.com/BVLC/caffe.git
进入caffe-master(下载后的文件夹名),将 Makefile.config.example 文件复制一份并更名为 Makefile.config,此文件即为Caffe编译安装时的配置文件
sudo cp Makefile.config.example Makefile.config
修改Makefile.config的内容,本人在Anaconda3的环境中,配置文件内容如下:
## Refer to http://caffe.berkeleyvision.org/installation.html
# Contributions simplifying and improving our build system are welcome!
# cuDNN acceleration switch (uncomment to build with cuDNN).
USE_CUDNN := 1
# CPU-only switch (uncomment to build without GPU support).
# CPU_ONLY := 1
# uncomment to disable IO dependencies and corresponding data layers
# USE_OPENCV := 0
# USE_LEVELDB := 0
# USE_LMDB := 0
# uncomment to allow MDB_NOLOCK when reading LMDB files (only if necessary)
# You should not set this flag if you will be reading LMDBs with any
# possibility of simultaneous read and write
# ALLOW_LMDB_NOLOCK := 1
# Uncomment if you're using OpenCV 3
OPENCV_VERSION := 3
# To customize your choice of compiler, uncomment and set the following.
# N.B. the default for Linux is g++ and the default for OSX is clang++
# CUSTOM_CXX := g++
# CUDA directory contains bin/ and lib/ directories that we need.
CUDA_DIR := /usr/local/cuda-9.1
# On Ubuntu 14.04, if cuda tools are installed via
# "sudo apt-get install nvidia-cuda-toolkit" then use this instead:
# CUDA_DIR := /usr
# CUDA architecture setting: going with all of them.
# For CUDA < 6.0, comment the *_50 through *_61 lines for compatibility.
# For CUDA < 8.0, comment the *_60 and *_61 lines for compatibility.
# For CUDA >= 9.0, comment the *_20 and *_21 lines for compatibility.
CUDA_ARCH := #-gencode arch=compute_20,code=sm_20 \
#-gencode arch=compute_20,code=sm_21 \
-gencode arch=compute_30,code=sm_30 \
-gencode arch=compute_35,code=sm_35 \
-gencode arch=compute_50,code=sm_50 \
-gencode arch=compute_52,code=sm_52 \
-gencode arch=compute_60,code=sm_60 \
-gencode arch=compute_61,code=sm_61 \
-gencode arch=compute_61,code=compute_61
# BLAS choice:
# atlas for ATLAS (default)
# mkl for MKL
# open for OpenBlas
BLAS := open
# Custom (MKL/ATLAS/OpenBLAS) include and lib directories.
# Leave commented to accept the defaults for your choice of BLAS
# (which should work)!
# BLAS_INCLUDE := /path/to/your/blas
# BLAS_LIB := /path/to/your/blas
# Homebrew puts openblas in a directory that is not on the standard search path
# BLAS_INCLUDE := $(shell brew --prefix openblas)/include
# BLAS_LIB := $(shell brew --prefix openblas)/lib
# This is required only if you will compile the matlab interface.
# MATLAB directory should contain the mex binary in /bin.
MATLAB_DIR := /usr/local/MATLAB/R2016a
# MATLAB_DIR := /Applications/MATLAB_R2012b.app
# NOTE: this is required only if you will compile the python interface.
# We need to be able to find Python.h and numpy/arrayobject.h.
PYTHON_INCLUDE := /home/gy/anaconda3/include/python3.6m \
/home/gy/anaconda3/lib/python3.6/site-packages/numpy/core/include
# Anaconda Python distribution is quite popular. Include path:
# Verify anaconda location, sometimes it's in root.
ANACONDA_HOME := $(HOME)/anaconda3
PYTHON_INCLUDE := $(ANACONDA_HOME)/include \
$(ANACONDA_HOME)/include/python3.6m \
$(ANACONDA_HOME)/lib/python3.6/site-packages/numpy/core/include
# Uncomment to use Python 3 (default is Python 2)
PYTHON_LIBRARIES := boost_python3 python3.6m
PYTHON_INCLUDE := /home/gy/anaconda3/include/python3.6m \
/home/gy/anaconda3/lib/python3.6/site-packages/numpy/core/include
# We need to be able to find libpythonX.X.so or .dylib.
#PYTHON_LIB := /usr/lib
PYTHON_LIB := $(ANACONDA_HOME)/lib
# Homebrew installs numpy in a non standard path (keg only)
# PYTHON_INCLUDE += $(dir $(shell python -c 'import numpy.core; print(numpy.core.__file__)'))/include
# PYTHON_LIB += $(shell brew --prefix numpy)/lib
# Uncomment to support layers written in Python (will link against Python libs)
WITH_PYTHON_LAYER := 1
# Whatever else you find you need goes here.
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu/hdf5/serial
# If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies
# INCLUDE_DIRS += $(shell brew --prefix)/include
# LIBRARY_DIRS += $(shell brew --prefix)/lib
# NCCL acceleration switch (uncomment to build with NCCL)
# https://github.com/NVIDIA/nccl (last tested version: v1.2.3-1+cuda8.0)
# USE_NCCL := 1
# Uncomment to use `pkg-config` to specify OpenCV library paths.
# (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.)
USE_PKG_CONFIG := 1
# N.B. both build and distribute dirs are cleared on `make clean`
BUILD_DIR := build
DISTRIBUTE_DIR := distribute
# Uncomment for debugging. Does not work on OSX due to https://github.com/BVLC/caffe/issues/171
# DEBUG := 1
# The ID of the GPU that 'make runtest' will use to run unit tests.
TEST_GPUID := 0
# enable pretty build (comment to see full commands)
Q ?= @
# 防止出现错误:libpng16.so.16: undefined reference to `inflateValidate@ZLIB_1.2.9'
LINKFLAGS := -Wl,-rpath,$(HOME)/anaconda3/lib
修改caffe-master文件夹中的Makefile文件:
将:
NVCCFLAGS +=-ccbin=$(CXX) -Xcompiler-fPIC $(COMMON_FLAGS)
替换为:
NVCCFLAGS += -D_FORCE_INLINES -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
将:
LIBRARIES += glog gflags protobuf boost_system boost_filesystem m hdf5_hl hdf5
改为:
LIBRARIES += glog gflags protobuf boost_system boost_filesystem m hdf5_serial_hl hdf5_serial
编译安装Caffe:
make all -j8
sudo make runtest -j8
编译过程中可能出现的错误:
(1)编译过程中可能会出现/usr/bin/ld: cannot find -lboost_python3
的错误,原因是在系统中无法找到 boost_python3.so
这个库文件。在之前安装依赖包时运行了sudo apt-get install --no-install-recommends libboost-all-dev
,这时会根据系统中python2.7和python3.5的版本生成libboost_python-py27.so
和libboost_python-py35.so
,都在/usr/lib/x86_64-linux-gnu/
目录中。这时可以用boost_python3.so
创建一个软连接指向libboost_python-py35.so
,软连接也创建在/usr/lib/x86_64-linux-gnu/
目录中。
命令如下:
sudo ln -s libboost_python-py35.so libboost_python3.so
注意的是,编译caffe所用的环境为python3.6,但是使用libboost_python3.5的so文件,亲测也没有出现问题。这篇文章:https://www.jianshu.com/p/5afdb561ce94使用boost编译Anaconda3中的python3.6来生成了libboost_python-py36.so文件进而连接。
编译pycaffe:
安装 caffe 之后,就可以通过 caffe 去训练数据集或预测,只不过需要在命令行下通过 caffe 命令进行操作,而 pycaffe 的安装是为了更方便的去通过 python 来操作 caffe
sudo make pycaffe -j8
添加环境变量:
export PYTHONPATH="/home/gy/caffe-master/python"
编译过程中可能出现的错误:
(1)在Python环境下导入caffe:import caffe
,若出现如下错误:
ModuleNotFoundError: No module named 'google'
需要在Anaconda里安装protobuf:
conda install protobuf
运行此命令会安装libprotobuf3.5.2
和protobuf3.5.2
,安装完之后系统里的libprotoc
版本为3.5.1
,查看版本信息可通过:
protoc --version
在这里需要说明的是,在此之前系统中通过apt-get安装了libprotoc 2.6.1
,编译caffe时用到的libprotoc就是2.6.1,如果用libprotoc 3.5.1
去编译caffe,可能会出现一些关于protobuf未定义的错误。但是编译pycaffe时就需要用到libprotoc 3.5.1
。综上所示,编译成功caffe之后,再安装libprotoc 3.5.1
去编译pycaffe。
caffe的重装
清理掉之前编译好的caffe文件:
make clean
卸载掉libprotoc3.5
版本:
conda uninstall libprotobuf
使用此命令后会卸载掉libprotobuf3.5.2
和protobuf3.5.2
编译caffe并测试:
make all -j8
sudo make runtest -j8
运行mnist实例测试caffe是否安装成功
注意:在caffe中运行所有程序,都必须在根目录下(caffe-master)进行,否则会出错。
sudo sh data/mnist/get_mnist.sh # 下载mnist数据集
sudo sh examples/mnist/create_mnist.sh # 需要转换成LMDB格式数据
转换成功后,会在 examples/mnist/目录下,生成两个文件夹,分别是mnist_train_lmdb和mnist_test_lmdb,里面存放的data.mdb和lock.mdb,就是我们需要的运行数据
训练LeNet:
sudo sh examples/mnist/train_lenet.sh
主要介绍下面3个文件:
- solver.prototxt
- train_val.prototxt
- train.sh
该文件主要存放模型训练所用到的一些超参数,其中加粗为必须指定
net := 指定待训练模型结构文件,即train_val.prototxt
test_interval := 测试间隔,即每隔多少次迭代进行一次测试**
test_initialization := 指定是否进行初始测试,即模型未进行训练时的测试
test_iteration := 指定测试时进行的迭代次数
base_lr := 指定基本学习率
lr_policy := 学习率变更策略,参考:[lr_policy](https://blog.csdn.net/cham_3/article/details/52640668)
gamma := 学习率变更策略需要用到的参数
power := 同上
stepsize := 学习率变更策略Step的变更步长(固定步长)
stepvalue := 学习率变更策略Multistep的变更步长(可变步长)
max_iter := 模型训练的最大迭代次数
momentum := 动量,这是优化策略(Adam, SGD, … )用到的参数
momentum2 := 优化策略Adam用到的参数
weight_decay := 权重衰减率
clip_gradients := 固定梯度范围
display := 每隔几次迭代显示一次结果
snapshot := 快照,每隔几次保存一次模型参数
snapshot_prefix := 保存模型文件的前缀,可以是路径
type := solver优化策略,即SGD、Adam、AdaGRAD、RMSProp、NESTROVE、ADADELTA等
solver_mode := 指定训练模式,即GPU/CPU
debug_info := 指定是否打印调试信息,这里有对启用该功能的输出作介绍
device_id := 指定设备号(使用GPU模式),默认为0
该文件主要用来搭建网络结构,结构以layer为单位,下面代码以LeNet来介绍:
参数的初始化参考:https://blog.csdn.net/cham_3/article/details/52765087
激活函数参考:
name: "LeNet"
layer {
name: "mnist" #网络层名称
type: "Data" #网络层类型,数据层
top: "data" #这一层的输出,数据
top: "label" #这一层的输出,标签
include { phase: TRAIN } #TRAIN:=用于训练,TEST:=用于测试
transform_param { scale: 0.00390625 } #对数据进行scale
data_param { #数据层配置
source: "examples/mnist/mnist_train_lmdb" #数据存放路径
batch_size: 64 #指定batch大小
backend: LMDB #指定数据库格式,LMDB/LevelDB
}
}
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include { phase: TEST }
transform_param { scale: 0.00390625 }
data_param {
source: "examples/mnist/mnist_test_lmdb"
batch_size: 100
backend: LMDB
}
}
layer{
name:"conv1"
type:"Convolution" #卷积层
bottom:"data" #上一层的输出作为输入
top:"conv1"
param{name:"conv1_w" lr_mult:1 decay_mult:1} #卷积层参数w的名称,学习率和衰减率(相对于base_lr和weight_decay的倍数)
param{name:"conv1_b" lr_mult:2 decay_mult:0} #卷积层参数b的名称,学习率和衰减率
convolution_param{
num_output:20 #卷积层输出的feature map数量
kernel_size:5 #卷积层的大小
pad:0 #卷积层的填充大小
stride:1 #进行卷积的步长
weight_filler{type:"xavier" } #参数w的初始话策略
weight_filler{type:"constant" value:0.1} #参数b的初始化策略
}
}
layer { #BatchNorm层,对feature map进行批规范化处理
name:"bn1"
type:"BatchNorm"
bottom:"conv1"
top:"conv1"
batch_norm_param{ use_global_stats:false} #训练时为false,测试时为true
}
layer { #池化层,即下采样层
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX #最大值池化,还有AVE均值池化
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param { lr_mult: 1 }
param { lr_mult: 2 }
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler { type: "xavier" }
bias_filler { type: "constant" }
}
}
layer {
name:"bn2"
type:"BatchNorm"
bottom:"conv2"
top:"conv2"
batch_norm_param{ use_global_stats:false}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer { #全连接层
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param { lr_mult: 1 }
param { lr_mult: 2 }
inner_product_param {
num_output: 500
weight_filler { type: "xavier" }
bias_filler { type: "constant" }
}
}
layer { #激活函数层,提供非线性能力
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param { lr_mult: 1 }
param { lr_mult: 2 }
inner_product_param {
num_output: 10
weight_filler { type: "xavier" }
bias_filler { type: "constant" }
}
}
layer { #损失函数层
name: "prob"
type: "SoftmaxWithLoss"
bottom: "ip2"
bottom: "label"
top: "prob"
}
该脚本文件用来运行