DeepLearnToolbox usage:matlab->set path->add with subfloders
DeepLearnToolbox_NN (collated According to the function called order)
Contents
- ex1 vanilla(香草) neural net
- ex2 neural net with L2 weight decay
- ex3 neural net with dropout
- ex4 neural net with sigmoid activation function
- ex5 plotting functionality
- ex6 neural net with sigmoid activation and plotting of validation and training error
function test_example_NN
load mnist_uint8;
ex_choise=5;
train_x = double(train_x) / 255;
test_x = double(test_x) / 255;
train_y = double(train_y);
test_y = double(test_y);
[train_x, mu, sigma] = zscore(train_x);
test_x = normalize(test_x, mu, sigma);
ex1 vanilla(香草) neural net
if (ex_choise==1)
rand('state',0)
nn = nnsetup([784 100 10]);
opts.numepochs = 1;
opts.batchsize = 100;
[nn, L] = nntrain(nn, train_x, train_y, opts);
[er, bad] = nntest(nn, test_x, test_y);
assert(er < 0.08, 'Too big error');
end
ex2 neural net with L2 weight decay
if ex_choise==2
rand('state',0)
nn = nnsetup([784 100 10]);
nn.weightPenaltyL2 = 1e-4;
opts.numepochs = 1;
opts.batchsize = 100;
nn = nntrain(nn, train_x, train_y, opts);
[er, bad] = nntest(nn, test_x, test_y);
assert(er < 0.1, 'Too big error');
end
ex3 neural net with dropout
if ex_choise==3
rand('state',0)
nn = nnsetup([784 100 10]);
nn.dropoutFraction = 0.5;
opts.numepochs = 1;
opts.batchsize = 100;
nn = nntrain(nn, train_x, train_y, opts);
[er, bad] = nntest(nn, test_x, test_y);
assert(er < 0.1, 'Too big error');
end
ex4 neural net with sigmoid activation function
if ex_choise==4
rand('state',0)
nn = nnsetup([784 100 10]);
nn.activation_function = 'sigm';
nn.learningRate = 1;
opts.numepochs = 1;
opts.batchsize = 100;
nn = nntrain(nn, train_x, train_y, opts);
[er, bad] = nntest(nn, test_x, test_y);
assert(er < 0.1, 'Too big error');
end
ex5 plotting functionality
if ex_choise==5
rand('state',0)
nn = nnsetup([784 20 10]);
opts.numepochs = 5;
nn.output = 'softmax';
opts.batchsize = 1000;
opts.plot = 1;
nn = nntrain(nn, train_x, train_y, opts);
[er, bad] = nntest(nn, test_x, test_y);
assert(er < 0.1, 'Too big error');
end
epoch 1/5. Took 1.2234 seconds. Mini-batch mean squared error on training set is 0.94451; Full-batch train err = 0.387429
epoch 2/5. Took 1.2288 seconds. Mini-batch mean squared error on training set is 0.35993; Full-batch train err = 0.310213
epoch 3/5. Took 1.2134 seconds. Mini-batch mean squared error on training set is 0.30622; Full-batch train err = 0.279947
epoch 4/5. Took 1.2075 seconds. Mini-batch mean squared error on training set is 0.27597; Full-batch train err = 0.249559
epoch 5/5. Took 1.2745 seconds. Mini-batch mean squared error on training set is 0.25535; Full-batch train err = 0.234196
ex6 neural net with sigmoid activation and plotting of validation and training error
if ex_choise==6
vx = train_x(1:10000,:);
tx = train_x(10001:end,:);
vy = train_y(1:10000,:);
ty = train_y(10001:end,:);
rand('state',0)
nn = nnsetup([784 20 10]);
nn.output = 'softmax';
opts.numepochs = 5;
opts.batchsize = 1000;
opts.plot = 1;
nn = nntrain(nn, tx, ty, opts, vx, vy);
[er, bad] = nntest(nn, test_x, test_y);
assert(er < 0.1, 'Too big error');
end
function [x, mu, sigma] = zscore(x)
mu=mean(x);
sigma=max(std(x),eps);
x=bsxfun(@minus,x,mu);
x=bsxfun(@rdivide,x,sigma);
end
function x = normalize(x, mu, sigma)
x=bsxfun(@minus,x,mu);
x=bsxfun(@rdivide,x,sigma);
end
function nn = nnsetup(architecture)
nn.size = architecture;
nn.n = numel(nn.size);
nn.activation_function = 'tanh_opt';
nn.learningRate = 2;
nn.momentum = 0.5;
nn.scaling_learningRate = 1;
nn.weightPenaltyL2 = 0;
nn.nonSparsityPenalty = 0;
nn.sparsityTarget = 0.05;
nn.inputZeroMaskedFraction = 0;
nn.dropoutFraction = 0;
nn.testing = 0;
nn.output = 'sigm';
for i = 2 : nn.n
nn.W{i - 1} = (rand(nn.size(i), nn.size(i - 1)+1) - 0.5) * 2 * 4 * sqrt(6 / (nn.size(i) + nn.size(i - 1)));
nn.vW{i - 1} = zeros(size(nn.W{i - 1}));
nn.p{i} = zeros(1, nn.size(i));
end
end
function [nn, L] = nntrain(nn, train_x, train_y, opts, val_x, val_y)
assert(isfloat(train_x), 'train_x must be a float');
assert(nargin == 4 || nargin == 6,'number ofinput arguments must be 4 or 6')
loss.train.e = [];
loss.train.e_frac = [];
loss.val.e = [];
loss.val.e_frac = [];
opts.validation = 0;
if nargin == 6
opts.validation = 1;
end
fhandle = [];
if isfield(opts,'plot') && opts.plot == 1
fhandle = figure();
end
m = size(train_x, 1);
batchsize = opts.batchsize;
numepochs = opts.numepochs;
numbatches = m / batchsize;
assert(rem(numbatches, 1) == 0, 'numbatches must be a integer');
L = zeros(numepochs*numbatches,1);
n = 1;
for i = 1 : numepochs
tic;
kk = randperm(m);
for l = 1 : numbatches
batch_x = train_x(kk((l - 1) * batchsize + 1 : l * batchsize), :);
if(nn.inputZeroMaskedFraction ~= 0)
batch_x = batch_x.*(rand(size(batch_x))>nn.inputZeroMaskedFraction);
end
batch_y = train_y(kk((l - 1) * batchsize + 1 : l * batchsize), :);
nn = nnff(nn, batch_x, batch_y);
nn = nnbp(nn);
nn = nnapplygrads(nn);
L(n) = nn.L;
n = n + 1;
end
t = toc;
if opts.validation == 1
loss = nneval(nn, loss, train_x, train_y, val_x, val_y);
str_perf = sprintf('; Full-batch train mse = %f, val mse = %f', loss.train.e(end), loss.val.e(end));
else
loss = nneval(nn, loss, train_x, train_y);
str_perf = sprintf('; Full-batch train err = %f', loss.train.e(end));
end
if ishandle(fhandle)
nnupdatefigures(nn, fhandle, loss, opts, i);
end
disp(['epoch ' num2str(i) '/' num2str(opts.numepochs) '. Took ' num2str(t) ' seconds' '. Mini-batch mean squared error on training set is ' num2str(mean(L((n-numbatches):(n-1)))) str_perf]);
nn.learningRate = nn.learningRate * nn.scaling_learningRate;
end
end
function nn = nnff(nn, x, y)
n = nn.n;
m = size(x, 1);
x = [ones(m,1) x];
nn.a{1} = x;
for i = 2 : n-1
switch nn.activation_function
case 'sigm'
nn.a{i} = sigm(nn.a{i - 1} * nn.W{i - 1}');
case 'tanh_opt'
nn.a{i} = tanh_opt(nn.a{i - 1} * nn.W{i - 1}');
end
if(nn.dropoutFraction > 0)
if(nn.testing)
nn.a{i} = nn.a{i}.*(1 - nn.dropoutFraction);
else
nn.dropOutMask{i} = (rand(size(nn.a{i}))>nn.dropoutFraction);
nn.a{i} = nn.a{i}.*nn.dropOutMask{i};
end
end
if(nn.nonSparsityPenalty>0)
nn.p{i} = 0.99 * nn.p{i} + 0.01 * mean(nn.a{i}, 1);
end
nn.a{i} = [ones(m,1) nn.a{i}];
end
switch nn.output
case 'sigm'
nn.a{n} = sigm(nn.a{n - 1} * nn.W{n - 1}');
case 'linear'
nn.a{n} = nn.a{n - 1} * nn.W{n - 1}';
case 'softmax'
nn.a{n} = nn.a{n - 1} * nn.W{n - 1}';
nn.a{n} = exp(bsxfun(@minus, nn.a{n}, max(nn.a{n},[],2)));
nn.a{n} = bsxfun(@rdivide, nn.a{n}, sum(nn.a{n}, 2));
end
nn.e = y - nn.a{n};
switch nn.output
case {'sigm', 'linear'}
nn.L = 1/2 * sum(sum(nn.e .^ 2)) / m;
case 'softmax'
nn.L = -sum(sum(y .* log(nn.a{n}))) / m;
end
end
function nn = nnbp(nn)
n = nn.n;
sparsityError = 0;
switch nn.output
case 'sigm'
d{n} = - nn.e .* (nn.a{n} .* (1 - nn.a{n}));
case {'softmax','linear'}
d{n} = - nn.e;
end
for i = (n - 1) : -1 : 2
switch nn.activation_function
case 'sigm'
d_act = nn.a{i} .* (1 - nn.a{i});
case 'tanh_opt'
d_act = 1.7159 * 2/3 * (1 - 1/(1.7159)^2 * nn.a{i}.^2);
end
if(nn.nonSparsityPenalty>0)
pi = repmat(nn.p{i}, size(nn.a{i}, 1), 1);
sparsityError = [zeros(size(nn.a{i},1),1) nn.nonSparsityPenalty * (-nn.sparsityTarget ./ pi + (1 - nn.sparsityTarget) ./ (1 - pi))];
end
if i+1==n
d{i} = (d{i + 1} * nn.W{i} + sparsityError) .* d_act;
else
d{i} = (d{i + 1}(:,2:end) * nn.W{i} + sparsityError) .* d_act;
end
if(nn.dropoutFraction>0)
d{i} = d{i} .* [ones(size(d{i},1),1) nn.dropOutMask{i}];
end
end
for i = 1 : (n - 1)
if i+1==n
nn.dW{i} = (d{i + 1}' * nn.a{i}) / size(d{i + 1}, 1);
else
nn.dW{i} = (d{i + 1}(:,2:end)' * nn.a{i}) / size(d{i + 1}, 1);
end
end
end
function nn = nnapplygrads(nn)
for i = 1 : (nn.n - 1)
if(nn.weightPenaltyL2>0)
dW = nn.dW{i} + nn.weightPenaltyL2 * [zeros(size(nn.W{i},1),1) nn.W{i}(:,2:end)];
else
dW = nn.dW{i};
end
dW = nn.learningRate * dW;
if(nn.momentum>0)
nn.vW{i} = nn.momentum*nn.vW{i} + dW;
dW = nn.vW{i};
end
nn.W{i} = nn.W{i} - dW;
end
end
function nnupdatefigures(nn,fhandle,L,opts,i)
if i > 1
x_ax = 1:i;
if opts.validation == 1
M = {'Training','Validation'};
else
M = {'Training'};
end
if strcmp(nn.output,'softmax')
plot_x = x_ax';
plot_ye = L.train.e';
plot_yfrac = L.train.e_frac';
else
plot_x = x_ax';
plot_ye = L.train.e';
end
if opts.validation == 1
plot_x = [plot_x, x_ax'];
plot_ye = [plot_ye,L.val.e'];
end
if opts.validation == 1 && strcmp(nn.output,'softmax')
plot_yfrac = [plot_yfrac, L.val.e_frac'];
end
figure(fhandle);
if strcmp(nn.output,'softmax')
p1 = subplot(1,2,1);
plot(plot_x,plot_ye);
xlabel('Number of epochs'); ylabel('Error');title('Error');
title('Error')
legend(p1, M,'Location','NorthEast');
set(p1, 'Xlim',[0,opts.numepochs + 1])
p2 = subplot(1,2,2);
plot(plot_x,plot_yfrac);
xlabel('Number of epochs'); ylabel('Misclassification rate');
title('Misclassification rate')
legend(p2, M,'Location','NorthEast');
set(p2, 'Xlim',[0,opts.numepochs + 1])
else
p = plot(plot_x,plot_ye);
xlabel('Number of epochs'); ylabel('Error');title('Error');
legend(p, M,'Location','NorthEast');
set(gca, 'Xlim',[0,opts.numepochs + 1])
end
drawnow;
end
end
function [er, bad] = nntest(nn, x, y)
labels = nnpredict(nn, x);
[~, expected] = max(y,[],2);
bad = find(labels ~= expected);
er = numel(bad) / size(x, 1);
end