猫也能明白 Day4
在上一篇中,我们使用代码对图片进行扫描,并识别,在本篇中,我们会通过修改神经网络来实现这个功能
方法二 卷积网络
原理
在前面使用到的图像识别的网络中(这叫做连接层“InnerProduct”),我们只能使用固定大小的图像(256*256)。对网络进行更改,加入“过滤器”函数,如果把全部连接层都替换为卷积层,那样我们的输入图像可以为任何尺寸。
连接层
layer {
name: "fcN"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
卷积层 注意参考备注行,这里是更改的地方
layer {
name: "convN"
## 更改 type 原来的全连接层(InnerProduct)为卷积层(Convolution)
type: "Convolution"
bottom: "pool5"
top: "conv6"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
## 设置卷积层网络参数
convolution_param {
num_output: 4096
pad: 0
kernel_size: 6
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
更改网络的原则
每一层的神经计算网络会有输入输出,上一层的输出是下一层的输入,因此网络必须首尾相连。下面我们来看一下,最终修改后的卷积神经网络:
# AlexNet
name: "AlexNet"
layer {
name: "train-data"
type: "Data"
top: "data"
top: "label"
transform_param {
mirror: true
crop_size: 227
}
data_param {
batch_size: 128
}
include { stage: "train" }
}
layer {
name: "val-data"
type: "Data"
top: "data"
top: "label"
transform_param {
crop_size: 227
}
data_param {
batch_size: 32
}
include { stage: "val" }
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "norm1"
type: "LRN"
bottom: "conv1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "norm1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "norm2"
type: "LRN"
bottom: "conv2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "norm2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv6"
type: "Convolution"
bottom: "pool5"
top: "conv6"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 4096
pad: 0
kernel_size: 6
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "conv6"
top: "conv6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "conv6"
top: "conv6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "conv7"
type: "Convolution"
bottom: "conv6"
top: "conv7"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 4096
kernel_size: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "conv7"
top: "conv7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "conv7"
top: "conv7"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "conv8"
type: "Convolution"
bottom: "conv7"
top: "conv8"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 2
kernel_size: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "conv8"
bottom: "label"
top: "accuracy"
include { stage: "val" }
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "conv8"
bottom: "label"
top: "loss"
exclude { stage: "deploy" }
}
layer {
name: "softmax"
type: "Softmax"
bottom: "conv8"
top: "softmax"
include { stage: "deploy" }
}
参考资料,需要时可以参考
图像识别之卷积神经网络: http://papers.nips.cc/paper/4...
卷积神经网络的快速介绍: https://www.youtube.com/watch...
最后,在代码中使用已经训练好的卷积网络
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import caffe
import copy
from scipy.misc import imresize
import time
JOB_DIR = '##FIXME##' ## 设置工作目录
MODEL_FILE = JOB_DIR + '/deploy.prototxt' # 卷积网络
PRETRAINED = JOB_DIR + '/snapshot_iter_数值.caffemodel' # 将数值更改为已经训练好的权重模型
# 使用GPU
caffe.set_mode_gpu()
# 加载输入图像,到 numpy 矩阵
input_image = caffe.io.load_image(IMAGE_FILE)
plt.imshow(input_image)
##可以显示图像 plt.show()
# 初始化Caffe框架
net = caffe.Net(MODEL_FILE,PRETRAINED,caffe.TEST)
net.blobs['data'].reshape(1, 3, input_image.shape[0], input_image.shape[1])
net.reshape()
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_channel_swap('data', (2,1,0))
transformer.set_raw_scale('data', 255.0)
# 准备好标签用于识别结果
my_cmap = copy.copy(plt.cm.get_cmap('jet')) # get a copy of the jet color map
my_cmap.set_bad(alpha=0) # set how the colormap handles 'bad' values
# 将原始图像输入到神经网络
out = net.forward(data=np.asarray([transformer.preprocess('data', input_image)]))
# 输出识别结果
im = transformer.deprocess('data', net.blobs['data'].data[0])
classifications = out['softmax'][0]
classifications = imresize(classifications.argmax(axis=0),input_image.shape,interp='bilinear').astype('float')
classifications[classifications==0] = np.nan
plt.imshow(im)
plt.imshow(classifications,alpha=.5,cmap=my_cmap)
plt.show()
# 最后,释放内存
try:
del transformer
del net
del detections
except Exception as e:
print e
如有问题,欢迎关注或者联系作者 [email protected] !