Kaggle项目Digit Recognizer实现(二):caffe by python

上篇文章使用cs231n提供的python代码,实现了一个三层卷积神经网络,用于kaggle项目Digit Recognizer。由于实现更复杂的神经网络完全手写代码较为繁琐,所以这里采用caffe重新实现,希望能提高识别准确率。
本文中caffe使用python借口调用,并在ipython notebook中运行,其中,代码编写参考了http://caffe.berkeleyvision.org/中的notebook example: Image Classification and Filter Visualization和Learning LeNet



def readCSVFile(file):
    rawData = []
    csvfile = open(file, 'rb')
    reader = csv.reader(csvfile)
    for line in reader:
    rawData.pop(0)#remove file header
    intData = np.array(rawData).astype(np.uint8)#此处数据类型修改,与下面LMDB写入统一
    return intData


from pylab import *
%matplotlib inline
caffe_root = '你的caffe路径' 
import sys
sys.path.insert(0, caffe_root + 'python')
import caffe
from readCSV import csv_data
import numpy as np
import lmdb

data = csv_data.get_mnist_data()
X_train = data['X_train']
y_train = data['y_train']
X_val = data['X_val']
y_val = data['y_val']
X_test = data['X_test']

N = size(X_train,axis = 0)
map_size = X_train.nbytes * 10
env = lmdb.open('train_lmdb', map_size=map_size)

with env.begin(write=True) as txn:
    # txn is a Transaction object
    for i in range(N):
        datum = caffe.proto.caffe_pb2.Datum()
        datum.channels = X_train.shape[1]
        datum.height = X_train.shape[2]
        datum.width = X_train.shape[3]
        datum.data = X_train[i].tobytes()  # or .tostring() if numpy < 1.9
        datum.label = int(y_train[i])
        str_id = '{:08}'.format(i)

        # The encode is only essential in Python 3
        txn.put(str_id.encode('ascii'), datum.SerializeToString())

N = size(X_val,axis = 0)
map_size = X_val.nbytes * 10

env = lmdb.open('val_lmdb', map_size=map_size)

with env.begin(write=True) as txn:
    # txn is a Transaction object
    for i in range(N):
        datum = caffe.proto.caffe_pb2.Datum()
        datum.channels = X_val.shape[1]
        datum.height = X_val.shape[2]
        datum.width = X_val.shape[3]
        datum.data = X_val[i].tobytes()  # or .tostring() if numpy < 1.9
        datum.label = int(y_val[i])
        str_id = '{:08}'.format(i)

        # The encode is only essential in Python 3
        txn.put(str_id.encode('ascii'), datum.SerializeToString())



from caffe import layers as L, params as P
def lenet(lmdb, batch_size):
    # our version of LeNet: a series of linear and simple nonlinear transformations
    n = caffe.NetSpec()

    n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb,
                             transform_param=dict(scale=1./255), ntop=2)

    n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'))
    n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'))
    n.pool2 = L.Pooling(n.conv2, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.fc1 =   L.InnerProduct(n.pool2, num_output=500, weight_filler=dict(type='xavier'))
    n.relu1 = L.ReLU(n.fc1, in_place=True)
    n.fc2 =   L.InnerProduct(n.relu1, num_output=500, weight_filler=dict(type='xavier'))
    n.relu2 = L.ReLU(n.fc2, in_place=True)
    n.score = L.InnerProduct(n.relu2, num_output=10, weight_filler=dict(type='xavier'))
    n.loss =  L.SoftmaxWithLoss(n.score, n.label)

    return n.to_proto()

with open('train.prototxt', 'w') as f:
    f.write(str(lenet('train_lmdb', 64)))
with open('test.prototxt', 'w') as f:
    f.write(str(lenet('val_lmdb', 100)))



### define solver
from caffe.proto import caffe_pb2
s = caffe_pb2.SolverParameter()

# Set a seed for reproducible experiments:
# this controls for randomization in training.
#s.random_seed = 0xCAFFE

# Specify locations of the train and (maybe) test networks.
s.train_net = 'train.prototxt'
s.test_interval = 640  # 41000/64 = 640次训练,即一个epoch后测试一次
s.test_iter.append(10) # Test on 100 batches each time we test.

s.max_iter = 204800     # 最大训练次数,320epoch

# EDIT HERE to try different solvers
# solver types include "SGD", "Adam", and "Nesterov" among others.
s.type = "Adam"

# Set the initial learning rate for SGD.
s.base_lr = 0.0001  # EDIT HERE to try different learning rates
# Set momentum to accelerate learning by
# taking weighted average of current and previous updates.
s.momentum = 0.9
# Set weight decay to regularize and prevent overfitting
s.weight_decay = 5e-4

# Set `lr_policy` to define how the learning rate changes during training.
# This is the same policy as our default LeNet.
s.lr_policy = 'inv'
s.gamma = 0.0001
s.power = 0.75
# EDIT HERE to try the fixed rate (and compare with adaptive solvers)
# `fixed` is the simplest policy that keeps the learning rate constant.
# s.lr_policy = 'fixed'

# Display the current training loss and accuracy every 1000 iterations.
s.display = 100

# Snapshots are files used to store networks we've trained.
# We'll snapshot every 5K iterations -- twice during training.
s.snapshot = 100000
s.snapshot_prefix = 'custom_net'

# Train on the GPU
s.solver_mode = caffe_pb2.SolverParameter.GPU

# Write the solver to a temporary file and return its filename.
with open('solver.prototxt', 'w') as f:



### load the solver and create train and test nets
solver = None  
solver = caffe.get_solver('solver.prototxt')


solver = None  # ignore this workaround for lmdb data (can't instantiate two solvers on the same data)
solver = caffe.get_solver('solver.prototxt')
### solve
niter = 204800  # EDIT HERE increase to train for longer
test_interval = 640
# losses will also be stored in the log
train_loss = zeros(niter)
test_acc = zeros(int(np.ceil(niter / test_interval)))

# the main solver loop
for it in range(niter):
    solver.step(1)  # SGD by Caffe

    # store the train loss
    train_loss[it] = solver.net.blobs['loss'].data

    if it % 100 == 0:
        print 'Iteration', it, 'loss = ', train_loss[it]
    # run a full test every so often
    # (Caffe can also do this for us and write to a log, but we show here
    #  how to do it directly in Python, where more complicated things are easier.)
    if it % test_interval == 0:

        correct = 0
        for test_it in range(10):
            correct += sum(solver.test_nets[0].blobs['score'].data.argmax(1)
                           == solver.test_nets[0].blobs['label'].data)
        test_acc[it // test_interval] = correct / 1e3
        print 'Iteration', it, 'testing... test_acc = ', test_acc[it // test_interval]

_, ax1 = subplots()
ax2 = ax1.twinx()
ax1.plot(arange(niter), train_loss)
ax2.plot(test_interval * arange(len(test_acc)), test_acc, 'r')
ax1.set_ylabel('train loss')
ax2.set_ylabel('test accuracy')
ax2.set_title('Custom Test Accuracy: {:.2f}'.format(test_acc[-1]))

Kaggle项目Digit Recognizer实现(二):caffe by python_第1张图片




def create_deploy():
    # our version of LeNet: a series of linear and simple nonlinear transformations
    n = caffe.NetSpec()
    n.data = L.Input()#需手动修改deploy文件input_layer输入维数

    n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'))
    n.relu1 = L.ReLU(n.conv1, in_place=True)
    n.pool1 = L.Pooling(n.relu1, kernel_size=2, stride=2, pool=P.Pooling.MAX)

    n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'))
    n.relu2 = L.ReLU(n.conv2, in_place=True)
    n.pool2 = L.Pooling(n.relu2, kernel_size=2, stride=2, pool=P.Pooling.MAX)

    n.conv3 = L.Convolution(n.pool2, kernel_size=4, num_output=120, weight_filler=dict(type='xavier'))
    n.dropout3 = L.Dropout(n.conv3, dropout_ratio = 0.5)
    n.relu3 = L.ReLU(n.dropout3, in_place=True)

    n.fc4 =   L.InnerProduct(n.relu3, num_output=84, weight_filler=dict(type='xavier'))
    n.dropout4 = L.Dropout(n.fc4, dropout_ratio = 0.5)
    n.relu4 = L.ReLU(n.dropout4, in_place=True)

    n.score = L.InnerProduct(n.relu4, num_output=10, weight_filler=dict(type='xavier'))
    n.prob = L.Softmax(n.score)
    return n.to_proto()

with open('deploy.prototxt', 'w') as f:

生成deploy.prototxt文件后,打开该文件,再输入层加入输入数据维数信息(暂时没搞清楚如何在input层定义时写入该参数,只好麻烦一下手写了○| ̄|_)

layer {
  name: "data"
  type: "Input"
  top: "data"
  input_param { shape: { dim: 1 dim: 1 dim: 28 dim: 28 } }


model_def = 'deploy.prototxt'
model_weights = 'mymodel_204800.caffemodel'

net = caffe.Net(model_def,      # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)     # use test mode (e.g., don't perform dropout)
net.blobs['data'].reshape(10,        # batch size
                          1,         # 1-channel images
                          28, 28)  # image size is 28x28
image = X_test[:10,:]/255.0
# copy the image data into the memory allocated for the net
net.blobs['data'].data[...] = image
### perform classification
output = net.forward()

output_prob = output['prob'][:10,:]  # the output probability vector for the first image in the batch

print 'predicted class is:', output_prob.argmax(axis=1)
plt.imshow(image[:,0,:,:].transpose(1,0,2).reshape(28,28*10), cmap='gray')


predicted class is: [2 0 9 9 3 7 0 3 0 3]



