转自:http://blog.csdn.net/Rainbow0210/article/details/53010694?locationNum=1&fps=1
DBN的实现(DeepLeranToolBox):
这里是将DBN作为无监督学习框架来使用的,将“学习成果”赋给ANN来完成分类。
训练集是60000张28*28的手写数字图片,测试集是10000张28*28的手写数字图片,对应的单幅图片的特征维度为28*28=784
% function test_example_DBN
load mnist_uint8;
train_x = double(train_x) / 255;
test_x = double(test_x) / 255;
train_y = double(train_y);
test_y = double(test_y);
%% ex2 train a 100-100 hidden unit DBN and use its weights to initialize a NN
rand('state',0)
%train dbn
%对DBN的初始化
%除了输入层之外有两层,每层100个神经元,即为两个受限玻尔兹曼机
dbn.sizes = [100 100];
%训练次数
opts.numepochs = 2;
%每次随机的样本数量
opts.batchsize = 100;
%更新方向,目前不知道有什么用
opts.momentum = 0;
%学习速率
opts.alpha = 1;
%建立DBN
dbn = dbnsetup(dbn, train_x, opts);
%训练DBN
dbn = dbntrain(dbn, train_x, opts);
%至此,已完成了DBN的训练
%unfold dbn to nn
%将DBN训练得到的数据转化为NN的形式
nn = dbnunfoldtonn(dbn, 10);
%设置NN的阈值函数为Sigmoid函数
nn.activation_function = 'sigm';
%train nn
%训练NN
opts.numepochs = 3;
opts.batchsize = 100;
nn = nntrain(nn, train_x, train_y, opts);
[er, bad] = nntest(nn, test_x, test_y);
assert(er < 0.10, 'Too big error');
function dbn = dbnsetup(dbn, x, opts)
%n是单个样本的特征维度,784
n = size(x, 2);
%dbn.sizes是rbm的维度,[784 100 100]
dbn.sizes = [n, dbn.sizes];
%numel(dbn.sizes)返回dbn.sizes中的元素个数,对于[784 100 100],则为3
%初始化每个rbm
for u = 1 : numel(dbn.sizes) - 1
%初始化rbm的学习速率
dbn.rbm{u}.alpha = opts.alpha;
%学习方向
dbn.rbm{u}.momentum = opts.momentum;
%第一个rbm是784-100, 第二个rbm是100-100
%对应的连接权重,初始值全为0
dbn.rbm{u}.W = zeros(dbn.sizes(u + 1), dbn.sizes(u));
%用于更新的权重,下同,不再注释
dbn.rbm{u}.vW = zeros(dbn.sizes(u + 1), dbn.sizes(u));
%第一个rbm是784,第二个rbm是100
%显层的偏置值,初始值全为0
dbn.rbm{u}.b = zeros(dbn.sizes(u), 1);
dbn.rbm{u}.vb = zeros(dbn.sizes(u), 1);
%第一个rbm是100,第二个rbm是100
%隐层的偏置值,初始值全为0
dbn.rbm{u}.c = zeros(dbn.sizes(u + 1), 1);
dbn.rbm{u}.vc = zeros(dbn.sizes(u + 1), 1);
end
end
function dbn = dbntrain(dbn, x, opts)
% n = 1;
% x = train_x,60000个样本,每个维度为784,即60000*784
%n为dbn中有几个rbm,这里n=2
n = numel(dbn.rbm);
%充分训练第一个rbm
dbn.rbm{1} = rbmtrain(dbn.rbm{1}, x, opts);
%通过第一个rbm,依次训练后续的rbm
for i = 2 : n
%建立rbm
x = rbmup(dbn.rbm{i - 1}, x);
%训练rbm
dbn.rbm{i} = rbmtrain(dbn.rbm{i}, x, opts);
end
end
function x = rbmup(rbm, x)
%sigm为sigmoid函数
%通过隐层计算下一层
x = sigm(repmat(rbm.c', size(x, 1), 1) + x * rbm.W');
end
function rbm = rbmtrain(rbm, x, opts)
%矩阵x中的元素必须是浮点数,且取值为[0,1]
assert(isfloat(x), 'x must be a float');
assert(all(x(:)>=0) && all(x(:)<=1), 'all data in x must be in [0:1]');
%m为样本数量,这里m = 60000
m = size(x, 1);
%训练批次,每一批是opts.batchsize个样本,注意这里opts.batchsize必须整除m
numbatches = m / opts.batchsize;
%opts.batchsize必须能整除m
assert(rem(numbatches, 1) == 0, 'numbatches not integer');
%opts.numepochs,训练次数
for i = 1 : opts.numepochs
%随机打乱1-m的数,也就是1-m的随机数,kk是1-m的随机数向量
kk = randperm(m);
%训练结果的eer
err = 0;
%对每一批数据进行训练
for l = 1 : numbatches
%取出opts.batchsize个待训练的样本
%循环结束后所有样本都进行过训练,且仅训练了一次
batch = x(kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize), :);
%赋值给v1
%这里v1是100*784的矩阵
v1 = batch;
%通过v1计算h1的概率,吉布斯抽样
h1 = sigmrnd(repmat(rbm.c', opts.batchsize, 1) + v1 * rbm.W');
%通过h1计算v1的概率,吉布斯抽样
v2 = sigmrnd(repmat(rbm.b', opts.batchsize, 1) + h1 * rbm.W);
%通过v2计算h2的概率,吉布斯抽样
h2 = sigm(repmat(rbm.c', opts.batchsize, 1) + v2 * rbm.W');
%至此,h1,v1,h2,v2均已计算出来,即完成了对比散度算法的大半,只剩下相应权重的更新
%权重更新的差值计算
c1 = h1' * v1;
c2 = h2' * v2;
rbm.vW = rbm.momentum * rbm.vW + rbm.alpha * (c1 - c2) / opts.batchsize;
rbm.vb = rbm.momentum * rbm.vb + rbm.alpha * sum(v1 - v2)' / opts.batchsize;
rbm.vc = rbm.momentum * rbm.vc + rbm.alpha * sum(h1 - h2)' / opts.batchsize;
%更新权重
rbm.W = rbm.W + rbm.vW;
rbm.b = rbm.b + rbm.vb;
rbm.c = rbm.c + rbm.vc;
%计算err
err = err + sum(sum((v1 - v2) .^ 2)) / opts.batchsize;
end
%打印结果
disp(['epoch ' num2str(i) '/' num2str(opts.numepochs) '. Average reconstruction error is: ' num2str(err / numbatches)]);
end
end