运行结果:(小训练集)
set nb of threads to 2
using model:
nn.Sequential {
[input -> (1) -> (2) -> (3) -> (4) -> (5) -> (6) -> (7) -> (8) -> (9) -> (10) -> output]
(1): nn.SpatialConvolutionMap
(2): nn.Tanh
(3): nn.SpatialMaxPooling(2x2, 2,2)
(4): nn.SpatialConvolutionMap
(5): nn.Tanh
(6): nn.SpatialMaxPooling(2x2, 2,2)
(7): nn.Reshape(6400)
(8): nn.Linear(6400 -> 128)
(9): nn.Tanh
(10): nn.Linear(128 -> 10)
}
--2016-04-15 14:11:08-- http://torch7.s3-website-us-east-1.amazonaws.com/data/cifar-10-torch.tar.gz
Resolving torch7.s3-website-us-east-1.amazonaws.com (torch7.s3-website-us-east-1.amazonaws.com)... 54.231.114.209
Connecting to torch7.s3-website-us-east-1.amazonaws.com (torch7.s3-website-us-east-1.amazonaws.com)|54.231.114.209|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 182840310 (174M) [application/x-gzip]
Saving to: 'cifar-10-torch.tar.gz'
100%[====================================================================================>] 182,840,310 2.15MB/s in 1m 40s
2016-04-15 14:12:48 (1.75 MB/s) - 'cifar-10-torch.tar.gz' saved [182840310/182840310]
cifar-10-batches-t7/
cifar-10-batches-t7/data_batch_1.t7
cifar-10-batches-t7/data_batch_2.t7
cifar-10-batches-t7/data_batch_3.t7
cifar-10-batches-t7/data_batch_4.t7
cifar-10-batches-t7/data_batch_5.t7
cifar-10-batches-t7/test_batch.t7
preprocessing data (color space + normalization)
on training set:
online epoch # 1 [batchSize = 1]
[======================================== 2000/2000 ==================================>] Tot: 1m4s | Step: 15ms
time to learn 1 sample = 32.017202496529ms
ConfusionMatrix:
[[ 56 31 3 5 5 0 6 7 67 22] 27.723% [class: airplane]
[ 32 49 10 11 8 2 20 14 26 19] 25.654% [class: automobile]
[ 18 15 21 13 33 1 56 12 20 14] 10.345% [class: bird]
[ 3 35 13 33 10 1 57 14 8 21] 16.923% [class: cat]
[ 14 15 18 14 41 1 64 13 17 17] 19.159% [class: deer]
[ 11 22 15 31 20 2 42 16 8 16] 1.093% [class: dog]
[ 6 12 17 21 31 0 104 8 4 4] 50.242% [class: frog]
[ 5 29 11 13 26 1 50 41 11 12] 20.603% [class: horse]
[ 60 26 6 3 6 0 7 9 71 15] 34.975% [class: ship]
[ 35 29 6 12 16 2 23 15 36 29]] 14.286% [class: truck]
+ average row correct: 22.10025514476%
+ average rowUcol correct (VOC measure): 11.520450208336%
+ global correct: 22.35%
saving network to /home/liuxinyao/test/train-on-cifar/cifar.net
on testing Set:
[======================================== 1000/1000 ==================================>] Tot: 9s766ms | Step: 4ms
time to test 1 sample = 9.7703020572662ms
ConfusionMatrix:
[[ 10 1 4 11 6 3 9 3 55 1] 9.709% [class: airplane]
[ 5 9 9 12 5 2 4 6 34 3] 10.112% [class: automobile]
[ 3 0 5 13 21 5 19 8 26 0] 5.000% [class: bird]
[ 2 2 6 30 10 5 28 8 9 3] 29.126% [class: cat]
[ 0 1 3 10 28 0 26 9 12 1] 31.111% [class: deer]
[ 0 4 6 24 10 5 23 7 7 0] 5.814% [class: dog]
[ 0 0 0 16 22 2 64 5 3 0] 57.143% [class: frog]
[ 2 0 2 15 17 5 12 43 5 1] 42.157% [class: horse]
[ 11 2 4 9 5 0 1 3 70 1] 66.038% [class: ship]
[ 6 5 2 12 9 2 7 21 41 4]] 3.670% [class: truck]
+ average row correct: 25.987956374884%
+ average rowUcol correct (VOC measure): 13.055113404989%
+ global correct: 26.8%
on training set:
online epoch # 2 [batchSize = 1]
[======================================== 2000/2000 ==================================>] Tot: 1m29s | Step: 15ms
time to learn 1 sample = 44.94854092598ms
ConfusionMatrix:
[[ 60 19 8 8 4 2 4 11 67 19] 29.703% [class: airplane]
[ 26 62 11 11 12 3 14 13 19 20] 32.461% [class: automobile]
[ 14 11 35 22 27 3 48 16 18 9] 17.241% [class: bird]
[ 6 22 13 61 7 3 42 11 6 24] 31.282% [class: cat]
[ 10 5 17 16 56 3 57 23 18 9] 26.168% [class: deer]
[ 14 15 16 41 21 9 31 19 7 10] 4.918% [class: dog]
[ 7 3 12 30 17 2 126 5 3 2] 60.870% [class: frog]
[ 8 12 7 17 22 3 24 85 8 13] 42.714% [class: horse]
[ 43 16 8 2 6 1 6 9 99 13] 48.768% [class: ship]
[ 31 31 9 19 9 5 10 25 31 33]] 16.256% [class: truck]
+ average row correct: 31.038115471601%
+ average rowUcol correct (VOC measure): 17.537745833397%
+ global correct: 31.3%
saving network to /home/liuxinyao/test/train-on-cifar/cifar.net
on testing Set:
[======================================== 1000/1000 ==================================>] Tot: 5s927ms | Step: 7ms
time to test 1 sample = 5.933867931366ms
ConfusionMatrix:
[[ 11 3 4 15 5 4 9 5 44 3] 10.680% [class: airplane]
[ 6 23 9 12 7 1 3 6 16 6] 25.843% [class: automobile]
[ 2 1 6 15 19 6 18 10 23 0] 6.000% [class: bird]
[ 2 4 6 34 6 7 28 8 6 2] 33.010% [class: cat]
[ 0 1 4 10 23 1 24 15 11 1] 25.556% [class: deer]
[ 3 4 7 26 11 9 16 7 3 0] 10.465% [class: dog]
[ 1 0 0 19 22 2 60 6 2 0] 53.571% [class: frog]
[ 1 1 4 15 13 7 8 48 4 1] 47.059% [class: horse]
[ 10 3 4 17 3 0 1 4 60 4] 56.604% [class: ship]
[ 6 17 4 13 7 5 8 11 29 9]] 8.257% [class: truck]
+ average row correct: 27.704359181225%
+ average rowUcol correct (VOC measure): 15.089332349598%
+ global correct: 28.3%
on training set:
online epoch # 3 [batchSize = 1]
[======================================== 2000/2000 ==================================>] Tot: 1m32s | Step: 47ms
time to learn 1 sample = 46.265820503235ms
ConfusionMatrix:
[[ 71 15 7 10 5 4 4 13 56 17] 35.149% [class: airplane]
[ 23 80 9 7 9 2 12 12 12 25] 41.885% [class: automobile]
[ 13 10 40 26 26 4 35 23 19 7] 19.704% [class: bird]
[ 7 16 11 72 6 7 41 14 7 14] 36.923% [class: cat]
[ 11 4 16 16 64 3 49 26 16 9] 29.907% [class: deer]
[ 14 15 20 37 21 14 24 20 6 12] 7.650% [class: dog]
[ 8 3 11 29 19 4 125 7 1 0] 60.386% [class: frog]
[ 10 7 7 18 16 5 13 106 5 12] 53.266% [class: horse]
[ 38 14 8 3 5 1 6 11 108 9] 53.202% [class: ship]
[ 30 40 9 18 7 5 6 19 26 43]] 21.182% [class: truck]
+ average row correct: 35.925469771028%
+ average rowUcol correct (VOC measure): 21.164997443557%
+ global correct: 36.15%
saving network to /home/liuxinyao/test/train-on-cifar/cifar.net
on testing Set:
[======================================== 1000/1000 ==================================>] Tot: 18s641ms | Step: 15ms
time to test 1 sample = 18.646162986755ms
ConfusionMatrix:
[[ 14 3 4 18 4 4 9 6 41 0] 13.592% [class: airplane]
[ 6 27 7 11 6 1 4 5 14 8] 30.337% [class: automobile]
[ 2 1 8 14 21 5 17 9 22 1] 8.000% [class: bird]
[ 4 4 6 34 6 9 28 6 5 1] 33.010% [class: cat]
[ 0 1 8 9 26 0 19 16 11 0] 28.889% [class: deer]
[ 4 3 7 30 11 11 12 6 2 0] 12.791% [class: dog]
[ 0 0 0 17 22 2 64 3 3 1] 57.143% [class: frog]
[ 2 0 4 15 13 7 6 51 2 2] 50.000% [class: horse]
[ 11 3 4 17 3 1 1 3 58 5] 54.717% [class: ship]
[ 4 19 3 10 6 4 8 9 28 18]] 16.514% [class: truck]
+ average row correct: 30.499220788479%
+ average rowUcol correct (VOC measure): 17.480185627937%
+ global correct: 31.1%
大训练集的结果是56.57% 60.97% 63.44%
优化过程:
1、将所有的activation function由tanh改为ReLU后,准确度下降……从31.1%降为29%(最后一个epoch
2、在连接层后加0.5的dropout,准确度下降,到30.3%(最后一个epoch
3、把spatialMaxPooling变成spatialLPooling后,上升到38.8%(最后一个epoch
附源代码:
----------------------------------------------------------------------
-- This script shows how to train different models on the CIFAR
-- dataset, using multiple optimization techniques (SGD, ASGD, CG)
--
-- This script demonstrates a classical example of training
-- well-known models (convnet, MLP, logistic regression)
-- on a 10-class classification problem.
--
-- It illustrates several points:
-- 1/ description of the model
-- 2/ choice of a loss function (criterion) to minimize
-- 3/ creation of a dataset as a simple Lua table
-- 4/ description of training and test procedures
--
-- Clement Farabet
----------------------------------------------------------------------
require 'nn'
require 'optim'
require 'image'
----------------------------------------------------------------------
-- parse command-line options
--
dname,fname = sys.fpath()
cmd = torch.CmdLine()
cmd:text()
cmd:text('CIFAR Training')
cmd:text()
cmd:text('Options:')
cmd:option('-save', fname:gsub('.lua',''), 'subdirectory to save/log experiments in')
cmd:option('-network', '', 'reload pretrained network')
cmd:option('-model', 'convnet', 'type of model to train: convnet | mlp | linear')
cmd:option('-full', false, 'use full dataset (50,000 samples)')
cmd:option('-visualize', false, 'visualize input data and weights during training')
cmd:option('-seed', 1, 'fixed input seed for repeatable experiments')
cmd:option('-optimization', 'SGD', 'optimization method: SGD | ASGD | CG | LBFGS')
cmd:option('-learningRate', 1e-3, 'learning rate at t=0')
cmd:option('-batchSize', 1, 'mini-batch size (1 = pure stochastic)')
cmd:option('-weightDecay', 0, 'weight decay (SGD only)')
cmd:option('-momentum', 0, 'momentum (SGD only)')
cmd:option('-t0', 1, 'start averaging at t0 (ASGD only), in nb of epochs')
cmd:option('-maxIter', 5, 'maximum nb of iterations for CG and LBFGS')
cmd:option('-threads', 2, 'nb of threads to use')
cmd:text()
opt = cmd:parse(arg)
-- fix seed
torch.manualSeed(opt.seed)
-- threads
torch.setnumthreads(opt.threads)
print(' set nb of threads to ' .. opt.threads)
----------------------------------------------------------------------
-- define model to train
-- on the 10-class classification problem
--
classes = {'airplane', 'automobile', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck'}
if opt.network == '' then
-- define model to train
model = nn.Sequential()
if opt.model == 'convnet' then
------------------------------------------------------------
-- convolutional network
------------------------------------------------------------
-- stage 1 : mean+std normalization -> filter bank -> squashing -> max pooling
model:add(nn.SpatialConvolutionMap(nn.tables.random(3,16,1), 5, 5))
model:add(nn.Tanh())
model:add(nn.SpatialMaxPooling(2, 2, 2, 2))
-- stage 2 : filter bank -> squashing -> max pooling
model:add(nn.SpatialConvolutionMap(nn.tables.random(16, 256, 4), 5, 5))
model:add(nn.Tanh())
model:add(nn.SpatialMaxPooling(2, 2, 2, 2))
-- stage 3 : standard 2-layer neural network
model:add(nn.Reshape(256*5*5))
model:add(nn.Linear(256*5*5, 128))
model:add(nn.Tanh())
model:add(nn.Linear(128,#classes))
------------------------------------------------------------
elseif opt.model == 'mlp' then
------------------------------------------------------------
-- regular 2-layer MLP
------------------------------------------------------------
model:add(nn.Reshape(3*32*32))
model:add(nn.Linear(3*32*32, 1*32*32))
model:add(nn.Tanh())
model:add(nn.Linear(1*32*32, #classes))
------------------------------------------------------------
elseif opt.model == 'linear' then
------------------------------------------------------------
-- simple linear model: logistic regression
------------------------------------------------------------
model:add(nn.Reshape(3*32*32))
model:add(nn.Linear(3*32*32,#classes))
------------------------------------------------------------
else
print('Unknown model type')
cmd:text()
error()
end
else
print(' reloading previously trained network')
model = nn.Sequential()
model:read(torch.DiskFile(opt.network))
end
-- retrieve parameters and gradients
parameters,gradParameters = model:getParameters()
-- verbose
print(' using model:')
print(model)
----------------------------------------------------------------------
-- loss function: negative log-likelihood
--
model:add(nn.LogSoftMax())
criterion = nn.ClassNLLCriterion()
----------------------------------------------------------------------
-- get/create dataset
--
if opt.full then
trsize = 50000
tesize = 10000
else
trsize = 2000
tesize = 1000
end
-- download dataset
if not paths.dirp('cifar-10-batches-t7') then
local www = 'http://torch7.s3-website-us-east-1.amazonaws.com/data/cifar-10-torch.tar.gz'
local tar = paths.basename(www)
os.execute('wget ' .. www .. '; '.. 'tar xvf ' .. tar)
end
-- load dataset
trainData = {
data = torch.Tensor(50000, 3072),
labels = torch.Tensor(50000),
size = function() return trsize end
}
for i = 0,4 do
subset = torch.load('cifar-10-batches-t7/data_batch_' .. (i+1) .. '.t7', 'ascii')
trainData.data[{ {i*10000+1, (i+1)*10000} }] = subset.data:t()
trainData.labels[{ {i*10000+1, (i+1)*10000} }] = subset.labels
end
trainData.labels = trainData.labels + 1
subset = torch.load('cifar-10-batches-t7/test_batch.t7', 'ascii')
testData = {
data = subset.data:t():double(),
labels = subset.labels[1]:double(),
size = function() return tesize end
}
testData.labels = testData.labels + 1
-- resize dataset (if using small version)
trainData.data = trainData.data[{ {1,trsize} }]
trainData.labels = trainData.labels[{ {1,trsize} }]
testData.data = testData.data[{ {1,tesize} }]
testData.labels = testData.labels[{ {1,tesize} }]
-- reshape data
trainData.data = trainData.data:reshape(trsize,3,32,32)
testData.data = testData.data:reshape(tesize,3,32,32)
----------------------------------------------------------------------
-- preprocess/normalize train/test sets
--
print ' preprocessing data (color space + normalization)'
collectgarbage()
-- preprocess trainSet
normalization = nn.SpatialContrastiveNormalization(1, image.gaussian1D(7))
for i = 1,trainData:size() do
-- rgb -> yuv
local rgb = trainData.data[i]
local yuv = image.rgb2yuv(rgb)
-- normalize y locally:
yuv[1] = normalization(yuv[{{1}}])
trainData.data[i] = yuv
end
-- normalize u globally:
mean_u = trainData.data[{ {},2,{},{} }]:mean()
std_u = trainData.data[{ {},2,{},{} }]:std()
trainData.data[{ {},2,{},{} }]:add(-mean_u)
trainData.data[{ {},2,{},{} }]:div(-std_u)
-- normalize v globally:
mean_v = trainData.data[{ {},3,{},{} }]:mean()
std_v = trainData.data[{ {},3,{},{} }]:std()
trainData.data[{ {},3,{},{} }]:add(-mean_v)
trainData.data[{ {},3,{},{} }]:div(-std_v)
-- preprocess testSet
for i = 1,testData:size() do
-- rgb -> yuv
local rgb = testData.data[i]
local yuv = image.rgb2yuv(rgb)
-- normalize y locally:
yuv[{1}] = normalization(yuv[{{1}}])
testData.data[i] = yuv
end
-- normalize u globally:
testData.data[{ {},2,{},{} }]:add(-mean_u)
testData.data[{ {},2,{},{} }]:div(-std_u)
-- normalize v globally:
testData.data[{ {},3,{},{} }]:add(-mean_v)
testData.data[{ {},3,{},{} }]:div(-std_v)
----------------------------------------------------------------------
-- define training and testing functions
--
-- this matrix records the current confusion across classes
confusion = optim.ConfusionMatrix(classes)
-- log results to files
accLogger = optim.Logger(paths.concat(opt.save, 'accuracy.log'))
errLogger = optim.Logger(paths.concat(opt.save, 'error.log' ))
-- display function
function display(input)
iter = iter or 0
require 'image'
win_input = image.display{image=input, win=win_input, zoom=2, legend='input'}
if iter % 10 == 0 then
if opt.model == 'convnet' then
win_w1 = image.display{
image=model:get(1).weight, zoom=4, nrow=10,
min=-1, max=1,
win=win_w1, legend='stage 1: weights', padding=1
}
win_w2 = image.display{
image=model:get(4).weight, zoom=4, nrow=30,
min=-1, max=1,
win=win_w2, legend='stage 2: weights', padding=1
}
elseif opt.model == 'mlp' then
local W1 = torch.Tensor(model:get(2).weight):resize(2048,1024)
win_w1 = image.display{
image=W1, zoom=0.5, min=-1, max=1,
win=win_w1, legend='W1 weights'
}
local W2 = torch.Tensor(model:get(2).weight):resize(10,2048)
win_w2 = image.display{
image=W2, zoom=0.5, min=-1, max=1,
win=win_w2, legend='W2 weights'
}
end
end
iter = iter + 1
end
-- training function
function train(dataset)
-- epoch tracker
epoch = epoch or 1
-- local vars
local time = sys.clock()
local trainError = 0
-- do one epoch
print(' on training set:')
print(" online epoch # " .. epoch .. ' [batchSize = ' .. opt.batchSize .. ']')
for t = 1,dataset:size(),opt.batchSize do
-- disp progress
xlua.progress(t, dataset:size())
-- create mini batch
local inputs = {}
local targets = {}
for i = t,math.min(t+opt.batchSize-1,dataset:size()) do
-- load new sample
local input = dataset.data[i]
local target = dataset.labels[i]
table.insert(inputs, input)
table.insert(targets, target)
end
-- create closure to evaluate f(X) and df/dX
local feval = function(x)
-- get new parameters
if x ~= parameters then
parameters:copy(x)
end
-- reset gradients
gradParameters:zero()
-- f is the average of all criterions
local f = 0
-- evaluate function for complete mini batch
for i = 1,#inputs do
-- estimate f
local output = model:forward(inputs[i])
local err = criterion:forward(output, targets[i])
f = f + err
-- estimate df/dW
local df_do = criterion:backward(output, targets[i])
model:backward(inputs[i], df_do)
-- update confusion
confusion:add(output, targets[i])
-- visualize?
if opt.visualize then
display(inputs[i])
end
end
-- normalize gradients and f(X)
gradParameters:div(#inputs)
f = f/#inputs
trainError = trainError + f
-- return f and df/dX
return f,gradParameters
end
-- optimize on current mini-batch
if opt.optimization == 'CG' then
config = config or {maxIter = opt.maxIter}
optim.cg(feval, parameters, config)
elseif opt.optimization == 'LBFGS' then
config = config or {learningRate = opt.learningRate,
maxIter = opt.maxIter,
nCorrection = 10}
optim.lbfgs(feval, parameters, config)
elseif opt.optimization == 'SGD' then
config = config or {learningRate = opt.learningRate,
weightDecay = opt.weightDecay,
momentum = opt.momentum,
learningRateDecay = 5e-7}
optim.sgd(feval, parameters, config)
elseif opt.optimization == 'ASGD' then
config = config or {eta0 = opt.learningRate,
t0 = nbTrainingPatches * opt.t0}
_,_,average = optim.asgd(feval, parameters, config)
else
error('unknown optimization method')
end
end
-- train error
trainError = trainError / math.floor(dataset:size()/opt.batchSize)
-- time taken
time = sys.clock() - time
time = time / dataset:size()
print(" time to learn 1 sample = " .. (time*1000) .. 'ms')
-- print confusion matrix
print(confusion)
local trainAccuracy = confusion.totalValid * 100
confusion:zero()
-- save/log current net
local filename = paths.concat(opt.save, 'cifar.net')
os.execute('mkdir -p ' .. paths.dirname(filename))
if paths.filep(filename) then
os.execute('mv ' .. filename .. ' ' .. filename .. '.old')
end
print(' saving network to '..filename)
torch.save(filename, model)
-- next epoch
epoch = epoch + 1
return trainAccuracy, trainError
end
-- test function
function test(dataset)
-- local vars
local testError = 0
local time = sys.clock()
-- averaged param use?
if average then
cachedparams = parameters:clone()
parameters:copy(average)
end
-- test over given dataset
print(' on testing Set:')
for t = 1,dataset:size() do
-- disp progress
xlua.progress(t, dataset:size())
-- get new sample
local input = dataset.data[t]
local target = dataset.labels[t]
-- test sample
local pred = model:forward(input)
confusion:add(pred, target)
-- compute error
err = criterion:forward(pred, target)
testError = testError + err
end
-- timing
time = sys.clock() - time
time = time / dataset:size()
print(" time to test 1 sample = " .. (time*1000) .. 'ms')
-- testing error estimation
testError = testError / dataset:size()
-- print confusion matrix
print(confusion)
local testAccuracy = confusion.totalValid * 100
confusion:zero()
-- averaged param use?
if average then
-- restore parameters
parameters:copy(cachedparams)
end
return testAccuracy, testError
end
----------------------------------------------------------------------
-- and train!
--
while true do
-- train/test
trainAcc, trainErr = train(trainData)
testAcc, testErr = test (testData)
-- update logger
accLogger:add{['% train accuracy'] = trainAcc, ['% test accuracy'] = testAcc}
errLogger:add{['% train error'] = trainErr, ['% test error'] = testErr}
-- plot logger
accLogger:style{['% train accuracy'] = '-', ['% test accuracy'] = '-'}
errLogger:style{['% train error'] = '-', ['% test error'] = '-'}
accLogger:plot()
errLogger:plot()
end