Caffe学习3-使用预训练模型finetuning

该篇陈述了在采用imagenet大数据集合上使用caffenet预训练得到caffemodel,然后应用该caffemodel进一步fintuning图像风格数据库style。下面为主要步骤:

#采用别人的预训练模型,在自己的数据库上进行微调(fine-tunning)
#fine-tune是应用别人在大数据集合上训练到一定程度的caffemodel,在这进行微调。这比随机化参数要好,因为该模型可能已经接近最优!
#可以省时间,省资源。也可以克服没有大数据的困扰
#这里采用imagenet数据集上预训练模型caffemodel进行fine-tuning  style recognition
#第一步,加载caffe相关模块,准备数据,主要是style数据集合
caffe_root='../../'        #这里依据自己工程所在的地址,将地址转为caffe根目录。我的地址是caffe/example/test,所以是../../
import sys
#定义了caffe的python接口路径
sys.path.insert(0,caffe_root + 'python')        
import caffe

caffe.set_device(0)
caffe.set_mode_gpu()

#加载相关模块
import numpy as np
from pylab import *
%matplotlib inline
import tempfile

#定义图像预处理函数
def deprocess_net_image(image):
    image = image.copy()
    image = image[::-1]   #BGR->RGB
    image = image.transpose(1, 2, 0)
    image += [123, 117, 104]

    image[image < 0], image[image > 255] = 0,255
    image = np.round(image)
    image = np.require(image, dtype=np.uint8)
    return image
#第二步,下载数据集,在80K的style中下载2000张,20种风格中的五种标签,加入下载全部,full_dataset=True
#下载imagenet的mean文件,预训练模型caffemodel等
full_dataset = False
if full_dataset:
    NUM_STYLE_IAMGES = NUM_STYLE_LABELS = -1
else:
    NUM_STYLE_IMAGES = 2000
    NUM_STYLE_LABELS = 5

import os
#change direction=chdir
#os.chdir(caffe_root)
#!data/ilsvrc12/get_ilsvrc_aux.sh
#!scripts/download_model_binary.py models/bvlc_reference_caffenet
#!python examples/finetune_flickr_style/assemble_data.py \
#    --workers=-1 --seed=1701 \
#    --images=$NUM_STYLE_IMAGES --label=$NUM_STYLE_LABELS
#os.chdir('examples')
#定义参数,也即定义预训练模型的路径
import os
weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
assert os.path.exists(weights)
#加载imagenet标签
imagenet_label_file = caffe_root + 'data/ilsvrc12/synset_words.txt'
imagenet_labels = list(np.loadtxt(imagenet_label_file,str,delimiter='\t'))
assert len(imagenet_labels) == 1000
print 'loaded imagenet labels:\n','\n'.join(imagenet_labels[:10]+['...'])

#加载风格标签
style_label_file = caffe_root + 'examples/finetune_flickr_style/style_names.txt'
style_labels = list(np.loadtxt(style_label_file,str,delimiter='\n'))
if NUM_STYLE_LABELS > 0:
    style_labels = style_labels[:NUM_STYLE_LABELS]
print '\nLoaded style labels:\n',','.join(style_labels)

loaded imagenet labels:
n01440764 tench, Tinca tinca
n01443537 goldfish, Carassius auratus
n01484850 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
n01491361 tiger shark, Galeocerdo cuvieri
n01494475 hammerhead, hammerhead shark
n01496331 electric ray, crampfish, numbfish, torpedo
n01498041 stingray
n01514668 cock
n01514859 hen
n01518878 ostrich, Struthio camelus

Loaded style labels:
Detailed,Pastel,Melancholy,Noir,HDR

#定义网络,训练
from caffe import layers as L
from caffe import params as P
weight_param = dict(lr_mult=1, decay_mult=1)
bias_param = dict(lr_mult=2,decay_mult=0)
learned_param = [weight_param, bias_param]

frozen_param = [dict(lr_mult=0)]*2

#这里需要将参数filter全部改成filler,官网有错误!
def conv_relu(bottom,ks,nout,stride=1,pad=0,group=1,param=learned_param,weight_filler=dict(type='gaussian',std=0.01),bias_filler=dict(type='constant',value=0.1)):
    conv=L.Convolution(bottom,kernel_size=ks,stride=stride,num_output=nout,pad=pad,group=group,param=param,weight_filler=weight_filler,bias_filler=bias_filler)
    return conv,L.ReLU(conv,in_place=True)

def  fc_relu(bottom, nout, param=learned_param,weight_filler=dict(type='gaussian', std=0.005),bias_filler=dict(type='constant', value=0.1)):
    fc = L.InnerProduct(bottom, num_output=nout, param=param,
                        weight_filler=weight_filler,
                        bias_filler=bias_filler)
    return fc, L.ReLU(fc, in_place=True)

def max_pool(bottom, ks, stride=1):
    return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)

def caffenet(data, label=None, train=True, num_classes=1000,classifier_name='fc8', learn_all=False):
#Returns a NetSpec specifying CaffeNet, following the original proto text
#specification (./models/bvlc_reference_caffenet/train_val.prototxt)."""
#详细的定义网络
    n = caffe.NetSpec()
    n.data = data
    param = learned_param if learn_all else frozen_param
    n.conv1, n.relu1 = conv_relu(n.data, 11, 96, stride=4, param=param)
    n.pool1 = max_pool(n.relu1, 3, stride=2)
    n.norm1 = L.LRN(n.pool1, local_size=5, alpha=1e-4, beta=0.75)
    n.conv2, n.relu2 = conv_relu(n.norm1, 5, 256, pad=2, group=2, param=param)
    n.pool2 = max_pool(n.relu2, 3, stride=2)
    n.norm2 = L.LRN(n.pool2, local_size=5, alpha=1e-4, beta=0.75)
    n.conv3, n.relu3 = conv_relu(n.norm2, 3, 384, pad=1, param=param)
    n.conv4, n.relu4 = conv_relu(n.relu3, 3, 384, pad=1, group=2, param=param)
    n.conv5, n.relu5 = conv_relu(n.relu4, 3, 256, pad=1, group=2, param=param)
    n.pool5 = max_pool(n.relu5, 3, stride=2)
    n.fc6, n.relu6 = fc_relu(n.pool5, 4096, param=param)
    if train:
        n.drop6 = fc7input = L.Dropout(n.relu6, in_place=True)
    else:
        fc7input = n.relu6
    n.fc7, n.relu7 = fc_relu(fc7input, 4096, param=param)
    if train:
        n.drop7 = fc8input = L.Dropout(n.relu7, in_place=True)
    else:
        fc8input = n.relu7
# always learn fc8 (param=learned_param)
    fc8 = L.InnerProduct(fc8input, num_output=num_classes, param=learned_param)
# give fc8 the name specified by argument `classifier_name`
    n.__setattr__(classifier_name, fc8)
    if not train:
        n.probs = L.Softmax(fc8)
    if label is not None:
        n.label = label
        n.loss = L.SoftmaxWithLoss(fc8, n.label)
        n.acc = L.Accuracy(fc8, n.label)
    # write the net to a temporary file and return its filename
    with tempfile.NamedTemporaryFile(delete=False) as f:
        f.write(str(n.to_proto()))
        return f.name
#将不带标签的dummy data作为输入,看输出
dummy_data = L.DummyData(shape=dict(dim=[1,3,227,227]))
imagenet_net_filename = caffenet(data=dummy_data,train=False)
imagenet_net = caffe.Net(imagenet_net_filename,weights,caffe.TEST)
#定义风格style的网络
def style_net(train=True, learn_all=False, subset=None):
    if subset is None:
        subset = 'train' if train else 'test'
    source = caffe_root + 'data/flickr_style/%s.txt' % subset
    transform_param = dict(mirror=train, crop_size=227,
        mean_file=caffe_root + 'data/ilsvrc12/imagenet_mean.binaryproto')
    style_data, style_label = L.ImageData(
        transform_param=transform_param, source=source,
        batch_size=50, new_height=256, new_width=256, ntop=2)
    return caffenet(data=style_data, label=style_label, train=train,
                    num_classes=NUM_STYLE_LABELS,
                    classifier_name='fc8_flickr',
                    learn_all=learn_all)
untrained_style_net = caffe.Net(style_net(train=False, subset='train'),
                                weights, caffe.TEST)
untrained_style_net.forward()
style_data_batch = untrained_style_net.blobs['data'].data.copy()
style_label_batch = np.array(untrained_style_net.blobs['label'].data, dtype=np.int32)
#定义随机初始化参数的预测输出、imagenet的预测输出和采用了预训练模型的style网络预测输出
def disp_preds(net, image, labels, k=5, name='ImageNet'):
    input_blob = net.blobs['data']
    net.blobs['data'].data[0, ...] = image
    probs = net.forward(start='conv1')['probs'][0]
    top_k = (-probs).argsort()[:k]
    print 'top %d predicted %s labels =' % (k, name)
    print '\n'.join('\t(%d) %5.2f%% %s' % (i+1, 100*probs[p], labels[p])
                    for i, p in enumerate(top_k))

def disp_imagenet_preds(net, image):
    disp_preds(net, image, imagenet_labels, name='ImageNet')

def disp_style_preds(net, image):
    disp_preds(net, image, style_labels, name='style')
batch_index = 23
image = style_data_batch[batch_index]
plt.imshow(deprocess_net_image(image))
print 'actual label =', style_labels[style_label_batch[batch_index]]

actual label = Pastel
Caffe学习3-使用预训练模型finetuning_第1张图片

#预测错误,可能imagenet的1000类中可能该类
disp_imagenet_preds(imagenet_net,image)

top 5 predicted ImageNet labels =
(1) 7.01% n03483316 hand blower, blow dryer, blow drier, hair dryer, hair drier
(2) 4.90% n03544143 hourglass
(3) 4.36% n03584829 iron, smoothing iron
(4) 3.48% n04517823 vacuum, vacuum cleaner
(5) 2.85% n04317175 stethoscope

#也可以通过fc7来预测结果
diff = untrained_style_net.blobs['fc7'].data[0] - imagenet_net.blobs['fc7'].data[0]
error = (diff ** 2).sum()
#assert error < 1e-8
#删除untrained_style_net来节省内存
del untrained_style_net
#定义网络的solver文件,该文件主要用来定义训练时候的参数
from caffe.proto import caffe_pb2

def solver(train_net_path, test_net_path=None, base_lr=0.001):
    s = caffe_pb2.SolverParameter()

    # Specify locations of the train and (maybe) test networks.
    s.train_net = train_net_path
    if test_net_path is not None:
        s.test_net.append(test_net_path)
        s.test_interval = 1000  # Test after every 1000 training iterations.
        s.test_iter.append(100) # Test on 100 batches each time we test.

    # The number of iterations over which to average the gradient.
    # Effectively boosts the training batch size by the given factor, without
    # affecting memory utilization.
    s.iter_size = 1

    s.max_iter = 100000     # # of times to update the net (training iterations)

    # Solve using the stochastic gradient descent (SGD) algorithm.
    # Other choices include 'Adam' and 'RMSProp'.
    s.type = 'SGD'

    # Set the initial learning rate for SGD.
    s.base_lr = base_lr

    # Set `lr_policy` to define how the learning rate changes during training.
    # Here, we 'step' the learning rate by multiplying it by a factor `gamma`
    # every `stepsize` iterations.
    s.lr_policy = 'step'
    s.gamma = 0.1
    s.stepsize = 20000

    # Set other SGD hyperparameters. Setting a non-zero `momentum` takes a
    # weighted average of the current gradient and previous gradients to make
    # learning more stable. L2 weight decay regularizes learning, to help prevent
    # the model from overfitting.
    s.momentum = 0.9
    s.weight_decay = 5e-4

    # Display the current training loss and accuracy every 1000 iterations.
    s.display = 1000

    # Snapshots are files used to store networks we've trained.  Here, we'll
    # snapshot every 10K iterations -- ten times during training.
    s.snapshot = 10000
    s.snapshot_prefix = caffe_root + 'models/finetune_flickr_style/finetune_flickr_style'

    # Train on the GPU.  Using the CPU to train large networks is very slow.
    s.solver_mode = caffe_pb2.SolverParameter.GPU

    # Write the solver to a temporary file and return its filename.
    with tempfile.NamedTemporaryFile(delete=False) as f:
        f.write(str(s))
        return f.name
#这里,当网络和solver文件都定义好了之后,可以在终端输入指令直接进行训练,指令为:
#build/tools/caffe train -solver models/finetune_flickr_style/solver.prototxt \
#                        -weights models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel \
#                        -gpu 0
#该例程采用python接口
def run_solvers(niter, solvers, disp_interval=10):
    """Run solvers for niter iterations,
       returning the loss and accuracy recorded each iteration.
       `solvers` is a list of (name, solver) tuples."""
#应用blobs类来存储loss和acc
    blobs = ('loss', 'acc')
    loss, acc = ({name: np.zeros(niter) for name, _ in solvers}
                 for _ in blobs)
    for it in range(niter):
        for name, s in solvers:
            s.step(1)  # run a single SGD step in Caffe
            loss[name][it], acc[name][it] = (s.net.blobs[b].data.copy()
                                             for b in blobs)
        if it % disp_interval == 0 or it + 1 == niter:
            loss_disp = '; '.join('%s: loss=%.3f, acc=%2d%%' %
                                  (n, loss[n][it], np.round(100*acc[n][it]))
                                  for n, _ in solvers)
            print '%3d) %s' % (it, loss_disp)     
    # Save the learned weights from both nets.
    weight_dir = tempfile.mkdtemp()
    weights = {}
    for name, s in solvers:
        filename = 'weights.%s.caffemodel' % name
        weights[name] = os.path.join(weight_dir, filename)
        s.net.save(weights[name])
    return loss, acc, weights
#开始训练
niter = 400  # number of iterations to train

# Reset style_solver as before.
#风格style网络
style_solver_filename = solver(style_net(train=True))
style_solver = caffe.get_solver(style_solver_filename)
style_solver.net.copy_from(weights)

# For reference, we also create a solver that isn't initialized from
# the pretrained ImageNet weights.
#定义随机scratch网络
scratch_style_solver_filename = solver(style_net(train=True))
scratch_style_solver = caffe.get_solver(scratch_style_solver_filename)

print 'Running solvers for %d iterations...' % niter
solvers = [('pretrained', style_solver),
           ('scratch', scratch_style_solver)]
loss, acc, weights = run_solvers(niter, solvers)
print 'Done.'

train_loss, scratch_train_loss = loss['pretrained'], loss['scratch']
train_acc, scratch_train_acc = acc['pretrained'], acc['scratch']
style_weights, scratch_style_weights = weights['pretrained'], weights['scratch']

# Delete solvers to save memory.
del style_solver, scratch_style_solver, solvers
#绘制采用预训练模型的loss函数曲线和采用随机参数模型的loss曲线
plot(np.vstack([train_loss, scratch_train_loss]).T)
xlabel('Iteration #')
ylabel('Loss')

Caffe学习3-使用预训练模型finetuning_第2张图片

#绘制两个网络(预训练模型的网络和随机参数的网络精度曲线)
plot(np.vstack([train_acc, scratch_train_acc]).T)
xlabel('Iteration #')
ylabel('Accuracy')

Caffe学习3-使用预训练模型finetuning_第3张图片

#查看两个网络的测试精度
def eval_style_net(weights, test_iters=10):
    test_net = caffe.Net(style_net(train=False), weights, caffe.TEST)
    accuracy = 0
    for it in xrange(test_iters):
        accuracy += test_net.forward()['acc']
    accuracy /= test_iters
    return test_net, accuracy
test_net, accuracy = eval_style_net(style_weights)
print 'Accuracy, trained from ImageNet initialization: %3.1f%%' % (100*accuracy, )
scratch_test_net, scratch_accuracy = eval_style_net(scratch_style_weights)
print 'Accuracy, trained from   random initialization: %3.1f%%' % (100*scratch_accuracy, )

Accuracy, trained from ImageNet initialization: 51.4%
Accuracy, trained from random initialization: 23.6%

#点对点网络,更加简单方便相比较上面
#fc8输出预测
end_to_end_net = style_net(train=True, learn_all=True)

# Set base_lr to 1e-3, the same as last time when learning only the classifier.
# You may want to play around with different values of this or other
# optimization parameters when fine-tuning.  For example, if learning diverges
# (e.g., the loss gets very large or goes to infinity/NaN), you should try
# decreasing base_lr (e.g., to 1e-4, then 1e-5, etc., until you find a value
# for which learning does not diverge).
base_lr = 0.001

#这个是采用了caffemodel预训练模型
style_solver_filename = solver(end_to_end_net, base_lr=base_lr)
style_solver = caffe.get_solver(style_solver_filename)
style_solver.net.copy_from(style_weights)

#这个是随机参数网络
scratch_style_solver_filename = solver(end_to_end_net, base_lr=base_lr)
scratch_style_solver = caffe.get_solver(scratch_style_solver_filename)
scratch_style_solver.net.copy_from(scratch_style_weights)

print 'Running solvers for %d iterations...' % niter
solvers = [('pretrained, end-to-end', style_solver),
           ('scratch, end-to-end', scratch_style_solver)]
_, _, finetuned_weights = run_solvers(niter, solvers)
print 'Done.'

style_weights_ft = finetuned_weights['pretrained, end-to-end']
scratch_style_weights_ft = finetuned_weights['scratch, end-to-end']

# Delete solvers to save memory.
del style_solver, scratch_style_solver, solvers

Running solvers for 400 iterations…
0) pretrained, end-to-end: loss=0.734, acc=64%; scratch, end-to-end: loss=1.583, acc=28%
10) pretrained, end-to-end: loss=1.255, acc=62%; scratch, end-to-end: loss=1.632, acc=14%
20) pretrained, end-to-end: loss=0.873, acc=66%; scratch, end-to-end: loss=1.626, acc=12%
30) pretrained, end-to-end: loss=0.863, acc=70%; scratch, end-to-end: loss=1.587, acc=22%
40) pretrained, end-to-end: loss=0.752, acc=72%; scratch, end-to-end: loss=1.569, acc=26%
50) pretrained, end-to-end: loss=0.779, acc=70%; scratch, end-to-end: loss=1.596, acc=34%
60) pretrained, end-to-end: loss=0.789, acc=74%; scratch, end-to-end: loss=1.531, acc=32%
70) pretrained, end-to-end: loss=0.500, acc=76%; scratch, end-to-end: loss=1.549, acc=34%
80) pretrained, end-to-end: loss=0.792, acc=72%; scratch, end-to-end: loss=1.450, acc=42%
90) pretrained, end-to-end: loss=0.791, acc=72%; scratch, end-to-end: loss=1.482, acc=34%
100) pretrained, end-to-end: loss=0.582, acc=76%; scratch, end-to-end: loss=1.491, acc=32%
110) pretrained, end-to-end: loss=0.424, acc=84%; scratch, end-to-end: loss=1.621, acc=26%
120) pretrained, end-to-end: loss=0.457, acc=82%; scratch, end-to-end: loss=1.538, acc=28%
130) pretrained, end-to-end: loss=0.693, acc=70%; scratch, end-to-end: loss=1.513, acc=26%
140) pretrained, end-to-end: loss=0.481, acc=84%; scratch, end-to-end: loss=1.495, acc=30%
150) pretrained, end-to-end: loss=0.431, acc=80%; scratch, end-to-end: loss=1.462, acc=38%
160) pretrained, end-to-end: loss=0.422, acc=88%; scratch, end-to-end: loss=1.427, acc=34%
170) pretrained, end-to-end: loss=0.483, acc=76%; scratch, end-to-end: loss=1.618, acc=34%
180) pretrained, end-to-end: loss=0.357, acc=88%; scratch, end-to-end: loss=1.489, acc=34%
190) pretrained, end-to-end: loss=0.419, acc=84%; scratch, end-to-end: loss=1.440, acc=38%
200) pretrained, end-to-end: loss=0.538, acc=78%; scratch, end-to-end: loss=1.443, acc=32%
210) pretrained, end-to-end: loss=0.406, acc=86%; scratch, end-to-end: loss=1.696, acc=20%
220) pretrained, end-to-end: loss=0.366, acc=82%; scratch, end-to-end: loss=1.376, acc=40%
230) pretrained, end-to-end: loss=0.173, acc=92%; scratch, end-to-end: loss=1.483, acc=26%
240) pretrained, end-to-end: loss=0.258, acc=92%; scratch, end-to-end: loss=1.273, acc=46%
250) pretrained, end-to-end: loss=0.410, acc=82%; scratch, end-to-end: loss=1.364, acc=48%
260) pretrained, end-to-end: loss=0.335, acc=90%; scratch, end-to-end: loss=1.376, acc=34%
270) pretrained, end-to-end: loss=0.367, acc=80%; scratch, end-to-end: loss=1.424, acc=46%
280) pretrained, end-to-end: loss=0.374, acc=84%; scratch, end-to-end: loss=1.231, acc=48%
290) pretrained, end-to-end: loss=0.247, acc=90%; scratch, end-to-end: loss=1.235, acc=52%
300) pretrained, end-to-end: loss=0.317, acc=86%; scratch, end-to-end: loss=1.394, acc=34%
310) pretrained, end-to-end: loss=0.136, acc=96%; scratch, end-to-end: loss=1.284, acc=38%
320) pretrained, end-to-end: loss=0.308, acc=90%; scratch, end-to-end: loss=1.343, acc=42%
330) pretrained, end-to-end: loss=0.382, acc=82%; scratch, end-to-end: loss=1.675, acc=36%
340) pretrained, end-to-end: loss=0.209, acc=90%; scratch, end-to-end: loss=1.432, acc=42%
350) pretrained, end-to-end: loss=0.311, acc=88%; scratch, end-to-end: loss=1.251, acc=48%
360) pretrained, end-to-end: loss=0.325, acc=86%; scratch, end-to-end: loss=1.430, acc=36%
370) pretrained, end-to-end: loss=0.306, acc=88%; scratch, end-to-end: loss=1.462, acc=48%
380) pretrained, end-to-end: loss=0.345, acc=86%; scratch, end-to-end: loss=1.299, acc=46%
390) pretrained, end-to-end: loss=0.182, acc=96%; scratch, end-to-end: loss=1.303, acc=40%
399) pretrained, end-to-end: loss=0.242, acc=90%; scrat

#测试精度
test_net, accuracy = eval_style_net(style_weights_ft)
print 'Accuracy, finetuned from ImageNet initialization: %3.1f%%' % (100*accuracy, )
scratch_test_net, scratch_accuracy = eval_style_net(scratch_style_weights_ft)
print 'Accuracy, finetuned from   random initialization: %3.1f%%' % (100*scratch_accuracy, )

Accuracy, finetuned from ImageNet initialization: 55.6%
Accuracy, finetuned from random initialization: 45.8%

#重新查看对图像的预测,可见经过训练99.96%预测为pastel,即模型更加自信的认为该图像是pastel。
plt.imshow(deprocess_net_image(image))
disp_style_preds(test_net, image)

top 5 predicted style labels =
(1) 99.96% Pastel
(2) 0.02% Melancholy
(3) 0.01% Detailed
(4) 0.01% Noir
(5) 0.00% HDR
Caffe学习3-使用预训练模型finetuning_第4张图片

#test网络进行测试
batch_index = 19
image = test_net.blobs['data'].data[batch_index]
plt.imshow(deprocess_net_image(image))
print 'actual label =', style_labels[int(test_net.blobs['label'].data[batch_index])]

actual label = HDR
Caffe学习3-使用预训练模型finetuning_第5张图片

#显示预测五种风格的概率
disp_style_preds(test_net, image)

top 5 predicted style labels =
(1) 51.14% HDR
(2) 46.59% Melancholy
(3) 2.08% Pastel
(4) 0.14% Noir
(5) 0.06% Detailed

#随机网络对该副图像的预测
disp_style_preds(scratch_test_net, image)

top 5 predicted style labels =
(1) 61.20% HDR
(2) 14.25% Detailed
(3) 13.17% Melancholy
(4) 6.92% Pastel
(5) 4.46% Noir

#imagenet网络对于该图像的预测
disp_imagenet_preds(imagenet_net, image)

top 5 predicted ImageNet labels =
(1) 22.45% n04604644 worm fence, snake fence, snake-rail fence, Virginia fence
(2) 14.84% n09193705 alp
(3) 9.58% n04467665 trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi
(4) 8.13% n09468604 valley, vale
(5) 6.42% n02793495 barn

你可能感兴趣的:(caffe)