相关模块:
import numpy as np我使用的是net.Classifier()这个接口,先看看这个类的初始化函数,了解一下各个参数:
def __init__(self, model_file, pretrained_file, image_dims=None,
mean=None, input_scale=None, raw_scale=None,
channel_swap=None):
caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)
# configure pre-processing
in_ = self.inputs[0]
self.transformer = caffe.io.Transformer(
{in_: self.blobs[in_].data.shape})
self.transformer.set_transpose(in_, (2, 0, 1))
if mean is not None:
self.transformer.set_mean(in_, mean)
if input_scale is not None:
self.transformer.set_input_scale(in_, input_scale)
if raw_scale is not None:
self.transformer.set_raw_scale(in_, raw_scale)
if channel_swap is not None:
self.transformer.set_channel_swap(in_, channel_swap)
self.crop_dims = np.array(self.blobs[in_].data.shape[2:])
if not image_dims:
image_dims = self.crop_dims
self.image_dims = image_dims
可以看到,主要的处理还是在caffe.io.Transformer({in_: self.blobs[in_].data.shape})里面
class Transformer:
"""
Transform input for feeding into a Net.
Note: this is mostly for illustrative purposes and it is likely better
to define your own input preprocessing routine for your needs.
Parameters
----------
net : a Net for which the input should be prepared
"""
def __init__(self, inputs):
self.inputs = inputs
self.transpose = {}
self.channel_swap = {}
self.raw_scale = {}
self.mean = {}
self.input_scale = {}
def __check_input(self, in_):
if in_ not in self.inputs:
raise Exception('{} is not one of the net inputs: {}'.format(
in_, self.inputs))
def preprocess(self, in_, data):
"""
Format input for Caffe:
- convert to single
- resize to input dimensions (preserving number of channels)
- transpose dimensions to K x H x W
- reorder channels (for instance color to BGR)
- scale raw input (e.g. from [0, 1] to [0, 255] for ImageNet models)
- subtract mean
- scale feature
Parameters
----------
in_ : name of input blob to preprocess for
data : (H' x W' x K) ndarray
Returns
-------
caffe_in : (K x H x W) ndarray for input to a Net
"""
self.__check_input(in_)
caffe_in = data.astype(np.float32, copy=False)
transpose = self.transpose.get(in_)
channel_swap = self.channel_swap.get(in_)
raw_scale = self.raw_scale.get(in_)
mean = self.mean.get(in_)
input_scale = self.input_scale.get(in_)
in_dims = self.inputs[in_][2:]
if caffe_in.shape[:2] != in_dims:
caffe_in = resize_image(caffe_in, in_dims)
if transpose is not None:
caffe_in = caffe_in.transpose(transpose)
if channel_swap is not None:
caffe_in = caffe_in[channel_swap, :, :]
if raw_scale is not None:
caffe_in *= raw_scale ####
if mean is not None:
caffe_in -= mean
if input_scale is not None:
caffe_in *= input_scale ####
return caffe_in
我用的vgg_face,基于vgg16的一个人脸识别模型,这个model的相关信息可以看这里VGG Face Descriptor
caffe.Classifier我只传入了模型文件和网络定义文件,其他都用默认,所以要使用之前要对图像进行一系列的预处理
def init_net():
caffe.set_mode_gpu() # CPU太慢了....
model_def = '/home/hy/d/huangying/305git/code/vgg_face_caffe/VGG_FACE_deploy.prototxt'
model_pretrained = '/home/hy/d/huangying/305git/code/vgg_face_caffe/VGG_FACE.caffemodel'
net = caffe.Classifier(model_def, model_pretrained)
return net
二、图片数据预处理
一般是从图片路径直接把图片读取到python内存中,这个可以用很多模块实现,cv2、plt、Image之类的,不赘述
假设把所有要处理的图片都读进到了一个名为faces的矩阵里,格式为:
num * height * width * channels
接下来要先对图片预处理,包括
1.resize成网络输入的尺寸大小
2.减去网络训练数据的均值
3.如果是灰度图,复制成3通道图片,如果是彩色图,把rgb转为bgr
注意 image = cv2.resize(faces[i],(224,224)) 这个函数
如果输入的是单通道(灰度图),resize后输出则去掉了通道那个维度,e.g [48,48,1]=>[48,48]
如果输入的是3通道(彩色图),resize后输出维度保持不变, e.g [48,48,3]=>[48,48,3]
def prepare_data(faces):
'''
faces: num * height * width * channel
'''
num = faces.shape[0]
faces_resize = np.empty([num,3,224,224])
averageImg = np.array([129.1863,104.7624,93.5940])
for i in range(num):
face = np.empty([224,224,3])
if faces[i].shape[2] == 1:
image = cv2.resize(faces[i],(224,224)) # input grayscale e.g. [48,48,1], out no reduce dim for channel e.g [48,48]
face[:,:,0] = image
face[:,:,1] = image
face[:,:,2] = image
elif face[i].shape[2] == 3:
image = cv2.resize(faces[i],(224,224)) # input rgb e.g. [48,48,3], out keep dims e.g. [48,48,3]
face = image[:,:,::-1]
else:
print('channel must be 1 for grayscale or 3 for rgb')
raise ValueError
face = face - averageImg
face = face.transpose((2,0,1)) # c * h * w, for "out = net.forward_all( data = X )"
faces_resize[i] = face
return faces_resize
(1)
假如要处理很多图片,一张一张的forward是很耗时的
但是也不能一次性传入太多图片,这样显存(内存)会不足
所以这里对图片分batch进行处理
def extract_feat(faces,blobname,shape):
'''
faces: num * height * width * channel
blobname: e.g. 'fc6', 'pool5' ...
shape: shape for this blob, e.g fc6: [4096] , pool5:[512,7,7]
'''
net = init_net()
num = faces.shape[0]
batch_size=50 # 每次处理的数量
iters = num/batch_size # 全部图片一共要处理多少次(iters是循环的意思,用词貌似不恰当,然而习惯了-_-||)
mod = np.mod(num,batch_size) # 余数
netdata = np.empty([num] + shape )
if mod!=0:
iters += 1 # 注意,假如batch不能整除num,最后需要再添加一次
for i in range(iters):
print('forward process %d/%d'%(i+1,iters))
idx0 = i*batch_size
if i==(iters-1) and mod!=0:
idx1 = i*batch_size+mod # 注意,假如batch不能整除num,最后处理的数量就不是batch_size而是mod了
else:
idx1 = (i+1)*batch_size
data_batch = faces[idx0:idx1] # 选取从idx0到idx1之间的数据,python中的下标规则是,假如a=[1,2,3],那么a[0:2]=[ a[0],a[1] ] = [1,2](随便写的,不要计较语法...)
netdata_batch = pro_batch(data_batch,net,blobname,shape) # 处理batch数据
netdata[idx0:idx1]=netdata_batch
return netdata
(2)
注意net.forward_all()这个函数,返回的是一个dict
很多人说用out=net.forward_all( data = faces)就可以了(faces是包含多张图片)
然而这样的话
1、out字典里只有网络最后一层的输出,对于vggface的话就是out['prob'],表示各个类别的概率
2、out里面确实保留了所有batch的输出,即如果faces是5张图片,那么out['prob']的shape就是 5*num_classes
3、关键的是,很多人用feat = net.blobs['fc6'].data来获取中间数据,实际上我再尝试的时候发现feat的num只有1个!应该是net每次forward后只保留最后一张图的数据。
那么如何批量提取中间层特征?
经过各种查考,终于找到了办法用:
feat = net.forward_all( data = faces,blobs=[blobname] ) # blobname就是layername啦,层的名字
传入要提取中间层的name,那么在最后返回的dict中,就会发现不仅有’prob‘层的值,还有我们需要的中间层输出值
可以通过
feat = net.forward_all( data = faces,blobs=[name1,name2,...] )
传递多个层的name,就可以同时获取多个中间层的输出(特征)了
def pro_batch(faces,net,blobname,shape):
'''
faces: num * height * width * channel
blobname: e.g. 'fc6', 'pool5' ...
shape: shape for this blob, e.g fc6: [4096] , pool5:[512,7,7]
'''
faces = prepare_data(faces)
netdata_batch = net.forward_all( data = faces,blobs=[blobname] )[blobname]
return netdata_batch