Caffe学习笔记6:过程小结

之前写的学习笔记1用两种方法进行预测,今天发现有点不对。下面进行分析总结:
先来看看Classifier的源代码

#!/usr/bin/env python
"""
Classifier is an image classifier specialization of Net.
"""

import numpy as np

import caffe


class Classifier(caffe.Net):
    """
    Classifier extends Net for image class prediction
    by scaling, center cropping, or oversampling.

    Parameters
    ----------
    image_dims : dimensions to scale input for cropping/sampling.
        Default is to scale to net input size for whole-image crop.
    mean, input_scale, raw_scale, channel_swap: params for
        preprocessing options.
    """
    def __init__(self, model_file, pretrained_file, image_dims=None,
                 mean=None, input_scale=None, raw_scale=None,
                 channel_swap=None):
        caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)

        # configure pre-processing
        in_ = self.inputs[0]
        self.transformer = caffe.io.Transformer(
            {in_: self.blobs[in_].data.shape})
        self.transformer.set_transpose(in_, (2, 0, 1))
        if mean is not None:
            self.transformer.set_mean(in_, mean)
        if input_scale is not None:
            self.transformer.set_input_scale(in_, input_scale)
        if raw_scale is not None:
            self.transformer.set_raw_scale(in_, raw_scale)
        if channel_swap is not None:
            self.transformer.set_channel_swap(in_, channel_swap)

        self.crop_dims = np.array(self.blobs[in_].data.shape[2:])
        if not image_dims:
            image_dims = self.crop_dims
        self.image_dims = image_dims

    def predict(self, inputs, oversample=True):
        """
        Predict classification probabilities of inputs.

        Parameters
        ----------
        inputs : iterable of (H x W x K) input ndarrays.
        oversample : boolean
            average predictions across center, corners, and mirrors
            when True (default). Center-only prediction when False.

        Returns
        -------
        predictions: (N x C) ndarray of class probabilities for N images and C
            classes.
        """
        # Scale to standardize input dimensions.
        input_ = np.zeros((len(inputs),
                           self.image_dims[0],
                           self.image_dims[1],
                           inputs[0].shape[2]),
                          dtype=np.float32)
        for ix, in_ in enumerate(inputs):
            input_[ix] = caffe.io.resize_image(in_, self.image_dims)

        if oversample:
            # Generate center, corner, and mirrored crops.
            input_ = caffe.io.oversample(input_, self.crop_dims)
        else:
            # Take center crop.
            center = np.array(self.image_dims) / 2.0
            crop = np.tile(center, (1, 2))[0] + np.concatenate([
                -self.crop_dims / 2.0,
                self.crop_dims / 2.0
            ])
            crop = crop.astype(int)
            input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :]

        # Classify
        caffe_in = np.zeros(np.array(input_.shape)[[0, 3, 1, 2]],
                            dtype=np.float32)
        for ix, in_ in enumerate(input_):
            caffe_in[ix] = self.transformer.preprocess(self.inputs[0], in_)
        out = self.forward_all(**{self.inputs[0]: caffe_in})
        predictions = out[self.outputs[0]]

        # For oversampling, average predictions across crops.
        if oversample:
            predictions = predictions.reshape((len(predictions) / 10, 10, -1))
            predictions = predictions.mean(1)

        return predictions

其中

def __init__(self, model_file, pretrained_file, image_dims=None,  
                 mean=None, input_scale=None, raw_scale=None,  
                 channel_swap=None):  
        caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)  

说明Classifier调用了caffe.Net。之前所说用两种方法其实是错误的,应该是将Classifier中的参数用代码重新初始化。也就是说,你以后用这个class会很方便,直接省去图片的初始化。往后我将统一使用Classifier!
再看caffe.io.load_image()

def load_image(filename, color=True):
    """
    Load an image converting from grayscale or alpha as needed.

    Parameters
    ----------
    filename : string
    color : boolean
        flag for color format. True (default) loads as RGB while False
        loads as intensity (if image is already grayscale).

    Returns
    -------
    image : an image with type np.float32 in range [0, 1]
        of size (H x W x 3) in RGB or
        of size (H x W x 1) in grayscale.
    """
看出,caffe.io.load_image()是可以读灰度图的!!!将学习笔记3中的predict_plus.py中改成input_image=caffe.io.load_image(img,color=False)
实验成功!!!

注意:

Predict classification probabilities of inputs.

        Parameters
        ----------
        inputs : iterable of (H x W x K) input ndarrays.
        oversample : boolean
            average predictions across center, corners, and mirrors
            when True (default). Center-only prediction when False.

        Returns
        -------
        predictions: (N x C) ndarray of class probabilities for N images and C

predict函数中的input是一幅图片(W,H,K),后面的那个布尔值代表的是是否需要对图片crop,也就是1幅图片是否需要转化到10张图片,四个角+一个中心,水平翻转之后再来一次,总计10张。否则我们仅仅需要中心的那张就可以。返回的是一张N*C的numpy.ndarry。
对代码总结一下,如果不使用Classifier,则

import numpy as np  
import sys,os  
# 设置当前的工作环境在caffe下  
caffe_root = '/home/xxx/caffe/'   
# 我们也把caffe/python也添加到当前环境  
sys.path.insert(0, caffe_root + 'python')  
import caffe  
os.chdir(caffe_root)#更换工作目录  
  
# 设置网络结构  
net_file=caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'  
# 添加训练之后的参数  
caffe_model=caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'  
# 均值文件  
mean_file=caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy'  
  
# 这里对任何一个程序都是通用的,就是处理图片  
# 把上面添加的两个变量都作为参数构造一个Net  
net = caffe.Net(net_file,caffe_model,caffe.TEST)  
# 得到data的形状,这里的图片是默认matplotlib底层加载的  
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})  
# matplotlib加载的image是像素[0-1],图片的数据格式[weight,high,channels],RGB  
# caffe加载的图片需要的是[0-255]像素,数据格式[channels,weight,high],BGR,那么就需要转换  
  
# channel 放到前面  
transformer.set_transpose('data', (2,0,1))  
transformer.set_mean('data', np.load(mean_file).mean(1).mean(1))  
# 图片像素放大到[0-255]  
transformer.set_raw_scale('data', 255)   
# RGB-->BGR 转换  
transformer.set_channel_swap('data', (2,1,0))  
  
# 这里才是加载图片  
im=caffe.io.load_image(caffe_root+'examples/images/cat.jpg')  
# 用上面的transformer.preprocess来处理刚刚加载图片  
net.blobs['data'].data[...] = transformer.preprocess('data',im)  
#注意,网络开始向前传播啦  
out = net.forward()  
# 最终的结果: 当前这个图片的属于哪个物体的概率(列表表示)  
output_prob = output['prob'][0]  
# 找出最大的那个概率  
print 'predicted class is:', output_prob.argmax()  
  
# 也可以找出前五名的概率  
top_inds = output_prob.argsort()[::-1][:5]    
print 'probabilities and labels:'  
zip(output_prob[top_inds], labels[top_inds])  
  
# 最后加载数据集进行验证  
imagenet_labels_filename = caffe_root + 'data/ilsvrc12/synset_words.txt'  
labels = np.loadtxt(imagenet_labels_filename, str, delimiter='\t')  
  
top_k = net.blobs['prob'].data[0].flatten().argsort()[-1:-6:-1]  
for i in np.arange(top_k.size):  
    print top_k[i], labels[top_k[i]]

对于使用Classifier,则

# coding=utf-8  
import os  
import numpy as np  
from matplotlib import pyplot as plt  
import cv2  
import shutil  
import time  
  
#因为RGB和BGR需要调换一下才能显示  
def showimage(im):  
    if im.ndim == 3:  
        im = im[:, :, ::-1]  
    plt.set_cmap('jet')  
    plt.imshow(im)  
    plt.show()  
  
#特征可视化显示,padval用于调整亮度  
def vis_square(data, padsize=1, padval=0):  
    data -= data.min()  
    data /= data.max()  
  
    #因为我们要把某一层的特征图都显示到一个figure上,因此需要计算每个图片占用figure多少比例,以及绘制的位置  
    n = int(np.ceil(np.sqrt(data.shape[0])))  
    padding = ((0, n ** 2 - data.shape[0]), (0, padsize), (0, padsize)) + ((0, 0),) * (data.ndim - 3)  
    data = np.pad(data, padding, mode='constant', constant_values=(padval, padval))  
  
    # tile the filters into an image  
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))  
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])  
  
    showimage(data)  
  
  
#设置caffe源码所在的路径  
caffe_root = '../../../caffe/'  
import sys  
sys.path.insert(0, caffe_root + 'python')  
import caffe  
  
  
  
  
#加载均值文件  
mean_filename='./imagenet_mean.binaryproto'  
proto_data = open(mean_filename, "rb").read()  
a = caffe.io.caffe_pb2.BlobProto.FromString(proto_data)  
mean  = caffe.io.blobproto_to_array(a)[0]  
  
#创建网络,并加载已经训练好的模型文件  
gender_net_pretrained='./caffenet_train_iter_1500.caffemodel'  
gender_net_model_file='./deploy_gender.prototxt'  
gender_net = caffe.Classifier(gender_net_model_file, gender_net_pretrained,mean=mean,  
                       channel_swap=(2,1,0),#RGB-->BGR  
                       raw_scale=255,#把图片归一化到0~1之间  
                       image_dims=(256, 256))#设置输入图片的大小  
  
  
#预测分类及其可特征视化  
gender_list=['Male','Female']  
input_image = caffe.io.load_image('1.jpg')#读取图片  
  
prediction_gender=gender_net.predict([input_image])#预测图片性别  
#打印我们训练每一层的参数形状  
print 'params:'  
for k, v in gender_net.params.items():  
    print 'weight:'  
    print (k, v[0].data.shape)#在每一层的参数blob中,caffe用vector存储了两个blob变量,用v[0]表示weight  
    print 'b:'  
    print (k, v[1].data.shape)#用v[1]表示偏置参数  
#conv1滤波器可视化  
filters = gender_net.params['conv1'][0].data  
vis_square(filters.transpose(0, 2, 3, 1))  
#conv2滤波器可视化  
'''''filters = gender_net.params['conv2'][0].data 
vis_square(filters[:48].reshape(48**2, 5, 5))'''  
#特征图  
print 'feature maps:'  
for k, v in gender_net.blobs.items():  
    print (k, v.data.shape);  
    feat = gender_net.blobs[k].data[0,0:4]#显示名字为k的网络层,第一张图片所生成的4张feature maps  
    vis_square(feat, padval=1)  
  
  
  
  
  
#显示原图片,以及分类预测结果  
str_gender=gender_list[prediction_gender[0].argmax()]  
print str_gender  
  
plt.imshow(input_image)  
plt.title(str_gender)  
plt.show() 

blob格式为[num,ch,w,h]
使用opencv的imread()读取图像,必须转为浮点型以保存细节

图像储存顺序:BGR
cv2.imread(img,cv2.IMREAD_COLOR).astype(np.float32)
输出维度为[W,H,CH]
cv2.imread(img,cv2.IMREAD_GRAYSCALE).astype(np.float32)
输出维度为[W,H]
因此需要
input_image = input_image[:, :, np.newaxis]
变为[W,H,1]

你可能感兴趣的:(Caffe学习笔记6:过程小结)