英文官方文档:http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/00-classification.ipynb
import numpy as np
import matplotlib.pyplot as plt
# display plots in this notebook
%matplotlib inline
# set display defaults
plt.rcParams['figure.figsize'] = (10, 10) # large images
plt.rcParams['image.interpolation'] = 'nearest' # don't interpolate: show square pixels
plt.rcParams['image.cmap'] = 'gray' # use grayscale output rather than a (potentially misleading) color heatmap
import sys
import os
caffe_root = './' #指定caffe的根目录
sys.path.insert(0, caffe_root + 'python') #将caffe python接口文件路径添加到python path中
import caffe
# 判断model文件是否存在
if os.path.isfile(caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'):
print 'CaffeNet found.'
else:
print 'Downloading pre-trained CaffeNet model...'
使用python caffe.io.loadImage接口读取图片,返回的是[0-1]返回的np.float32数组
def load_image(filename, color=True):
"""
Load an image converting from grayscale or alpha as needed.
Parameters
----------
filename : string
color : boolean
flag for color format. True (default) loads as RGB while False
loads as intensity (if image is already grayscale).
Returns
-------
image : an image with type np.float32 in range [0, 1]
of size (H x W x 3) in RGB or
of size (H x W x 1) in grayscale.
"""
img = skimage.img_as_float(skimage.io.imread(filename, as_grey=not color)).astype(np.float32)
if img.ndim == 2:
img = img[:, :, np.newaxis]
if color:
img = np.tile(img, (1, 1, 3))
elif img.shape[2] == 4:
img = img[:, :, :3]
return img
python Transformer接口会对load_image读取的图片做处理,注意raw_scale实在减去均值和其他处理之前,而input_scale实在这些操作之后
def preprocess(self, in_, data):
"""
Format input for Caffe:
- convert to single
- resize to input dimensions (preserving number of channels)
- transpose dimensions to K x H x W
- reorder channels (for instance color to BGR)
- scale raw input (e.g. from [0, 1] to [0, 255] for ImageNet models)
- subtract mean
- scale feature
Parameters
----------
in_ : name of input blob to preprocess for
data : (H' x W' x K) ndarray
Returns
-------
caffe_in : (K x H x W) ndarray for input to a Net
"""
self.__check_input(in_)
caffe_in = data.astype(np.float32, copy=False)
transpose = self.transpose.get(in_)
channel_swap = self.channel_swap.get(in_)
raw_scale = self.raw_scale.get(in_)
mean = self.mean.get(in_)
input_scale = self.input_scale.get(in_)
in_dims = self.inputs[in_][2:]
#1 resize大小
if caffe_in.shape[:2] != in_dims:
caffe_in = resize_image(caffe_in, in_dims)
#2 维度变换,H*W*C转换成 C*H*W
if transpose is not None:
caffe_in = caffe_in.transpose(transpose)
#3 通道变换
if channel_swap is not None: #RGB
caffe_in = caffe_in[channel_swap, :, :]
#4 raw_scale 读取的图片数值范围在[0,1]时,raw_scale = 255,转换成[0,255]
if raw_scale is not None:
caffe_in *= raw_scale
#5 减去均值
if mean is not None:
caffe_in -= mean
# input_scale = 0.00390625时, 图片数据转换成[0,1]
if input_scale is not None:
caffe_in *= input_scale
return caffe_in
# 使用cpu计算
caffe.set_mode_cpu()
model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
# 加载网络
net = caffe.Net(model_def, # 模型定义文件
model_weights, # 模型参数文件
caffe.TEST) # 启用测试模式 (e.g., don't perform dropout)
# 加载均值文件,mu的shape是(3,256,256), mean(1)实在第一个维度上做均值,返回shape为(3,256)
# 再mean(1)后,返回形状是(3),分别是rgb三个通道上均值
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = mu.mean(1).mean(1)
print 'mean-subtracted values:', zip('BGR', mu)
#mean-subtracted values: [('B', 104.0069879317889), ('G', 116.66876761696767), ('R', 122.6789143406786)]
# create transformer for the input called 'data'
# 创建一个转换器,名字叫‘data’
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
# transformer会将channels变成最外面的维度, 即 (H,W,C) 变成(C, W, C)
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', mu) # 每个通道上减去均值
transformer.set_raw_scale('data', 255) # 从[0, 1]的范围放大到[0, 255]
transformer.set_channel_swap('data', (2,1,0)) #修改通道顺序,从RGB变成BGR
# 为了演示批处理,将输入的batch size修改成50
net.blobs['data'].reshape(50, # batch size
3, # 3通道
227, 227) # 图片大小为 227x227
# caffe.io.load_image读取图片值的范围是0-1,cv2.imread读取图片值的范围是0-255
image = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')
# transformer进行图片预处理,包括图片值转换到0-255
transformed_image = transformer.preprocess('data', image)
plt.imshow(image)
# 图片数据拷贝到net申请内存中
net.blobs['data'].data[...] = transformed_image
### 前向传播,执行图片分类。
output = net.forward()
# top blob可能有多个,使用'prob'索引,后面的0表示第一张图片的输出
output_prob = output['prob'][0]
# 获取分类编号
print 'predicted class is:', output_prob.argmax()
# 输出predicted class is: 281
# 加载imageNet的label文件
labels_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
if not os.path.exists(labels_file):
!../data/ilsvrc12/get_ilsvrc_aux.sh
labels = np.loadtxt(labels_file, str, delimiter='\t')
print 'output label:', labels[output_prob.argmax()]
# 输出内容 output label: n02123045 tabby, tabby cat
# sort默认升序排列,反转后全最大前五个
top_inds = output_prob.argsort()[::-1][:5] # reverse sort and take five largest items
print 'probabilities and labels:'
zip(output_prob[top_inds], labels[top_inds])
'''[(0.31243637, 'n02123045 tabby, tabby cat'),
(0.2379719, 'n02123159 tiger cat'),
(0.12387239, 'n02124075 Egyptian cat'),
(0.10075711, 'n02119022 red fox, Vulpes vulpes'),
(0.070957087, 'n02127052 lynx, catamount')]
'''
# CPU计算耗时
%timeit net.forward()
# 1 loop, best of 3: 1.42 s per loop
# 设置使用gpu,有多个gpu时使用编号的gpu
caffe.set_device(0) # if we have multiple GPUs, pick the first one
caffe.set_mode_gpu()
net.forward() # run once before timing to set up memory
%timeit net.forward()
# 10 loops, best of 3: 70.2 ms per loop