卷积神经网络(CNN)讲解及代码

相关文章:
1. 经典反向传播算法公式详细推导
2. 卷积神经网络(CNN)反向传播算法公式详细推导

网上有很多关于CNN的教程讲解,在这里我们抛开长篇大论,只针对代码来谈。本文用的是matlab编写的deeplearning toolbox,包括NN、CNN、DBN、SAE、CAE。在这里我们感谢作者编写了这样一个简单易懂,适用于新手学习的代码。由于本文直接针对代码,这就要求读者有一定的CNN基础,可以参考Lecun的Gradient-Based Learning Applied to Document Recognition和tornadomeet的博文
首先把Toolbox下载下来,解压缩到某位置。然后打开Matlab,把文件夹内的util和data利用Set Path添加至路径中。接着打开tests文件夹的test_example_CNN.m。最后在文件夹CNN中运行该代码。

下面是test_example_CNN.m中的代码及注释,比较简单。

load mnist_uint8;  %读取数据

% 把图像的灰度值变成0~1,因为本代码采用的是sigmoid激活函数
train_x = double(reshape(train_x',28,28,60000))/255;
test_x = double(reshape(test_x',28,28,10000))/255;
train_y = double(train_y');
test_y = double(test_y');

%% 卷积网络的结构为 6c-2s-12c-2s 
% 1 epoch 会运行大约200s, 错误率大约为11%。而 100 epochs 的错误率大约为1.2%。

rand('state',0) %指定状态使每次运行产生的随机结果相同

cnn.layers = {
    struct('type', 'i') % 输入层
    struct('type', 'c', 'outputmaps', 6, 'kernelsize', 5) % 卷积层
    struct('type', 's', 'scale', 2) % pooling层
    struct('type', 'c', 'outputmaps', 12, 'kernelsize', 5) % 卷积层
    struct('type', 's', 'scale', 2) % pooling层
};


opts.alpha = 1;  % 梯度下降的步长
opts.batchsize = 50; % 每次批处理50张图
opts.numepochs = 1; % 所有图片循环处理一次

cnn = cnnsetup(cnn, train_x, train_y); % 初始化CNN
cnn = cnntrain(cnn, train_x, train_y, opts); % 训练CNN

[er, bad] = cnntest(cnn, test_x, test_y); % 测试CNN

%plot mean squared error
figure; plot(cnn.rL);
assert(er<0.12, 'Too big error');

下面是cnnsetup.m中的代码及注释。

function net = cnnsetup(net, x, y)
    assert(~isOctave() || compare_versions(OCTAVE_VERSION, '3.8.0', '>='), ['Octave 3.8.0 or greater is required for CNNs as there is a bug in convolution in previous versions. See http://savannah.gnu.org/bugs/?39314. Your version is ' myOctaveVersion]);  %判断版本
    inputmaps = 1;  % 由于网络的输入为1张特征图,因此inputmaps为1
    mapsize = size(squeeze(x(:, :, 1)));  %squeeze():除去x中为1的维度,即得到28*28

    for l = 1 : numel(net.layers)   % 网络层数
        if strcmp(net.layers{l}.type, 's') % 如果是pooling层
            mapsize = mapsize / net.layers{l}.scale; % pooling之后图的大小            
            assert(all(floor(mapsize)==mapsize), ['Layer ' num2str(l) ' size must be integer. Actual: ' num2str(mapsize)]);
            for j = 1 : inputmaps
                net.layers{l}.b{j} = 0; % 偏置项
            end
        end
        if strcmp(net.layers{l}.type, 'c') % 如果是卷积层
            mapsize = mapsize - net.layers{l}.kernelsize + 1; % 确定卷积之后图像大小
            fan_out = net.layers{l}.outputmaps * net.layers{l}.kernelsize ^ 2; % 上一层连到该层的权值参数总个数
            for j = 1 : net.layers{l}.outputmaps  % 第l层特征图的数量
                fan_in = inputmaps * net.layers{l}.kernelsize ^ 2; % 上一层连到该层的第j个特征图的权值参数
                for i = 1 : inputmaps  % 上一层特征图的数量
                    net.layers{l}.k{i}{j} = (rand(net.layers{l}.kernelsize) - 0.5) * 2 * sqrt(6 / (fan_in + fan_out));   % 初始化权值,见论文Understanding the difficulty of training deep feedforward neural networks
                end
                net.layers{l}.b{j} = 0; % 偏置项
            end
            inputmaps = net.layers{l}.outputmaps;  % 用该层的outputmaps更新inputmaps的值并为下一层所用
        end
    end
    % 'onum' is the number of labels, that's why it is calculated using size(y, 1). If you have 20 labels so the output of the network will be 20 neurons.
    % 'fvnum' is the number of output neurons at the last layer, the layer just before the output layer.
    % 'ffb' is the biases of the output neurons.
    % 'ffW' is the weights between the last layer and the output neurons. Note that the last layer is fully connected to the output layer, that's why the size of the weights is (onum * fvnum)
    fvnum = prod(mapsize) * inputmaps; % 最终输出层前一层的结点数目
    onum = size(y, 1);  % 类别数目

    % 最后一层全连接网络的参数
    net.ffb = zeros(onum, 1); 
    net.ffW = (rand(onum, fvnum) - 0.5) * 2 * sqrt(6 / (onum + fvnum));
end

下面是cnntrain.m中的代码及注释。

function net = cnntrain(net, x, y, opts)
    m = size(x, 3);
    numbatches = m / opts.batchsize; 
    if rem(numbatches, 1) ~= 0
        error('numbatches not integer');
    end
    net.rL = [];
    for i = 1 : opts.numepochs
        disp(['epoch ' num2str(i) '/' num2str(opts.numepochs)]);
        tic; %tic和toc配套使用来求运行时间 
        kk = randperm(m);
        for l = 1 : numbatches
            batch_x = x(:, :, kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));
            batch_y = y(:,    kk((l - 1) * opts.batchsize + 1 : l * opts.batchsize));

            net = cnnff(net, batch_x);   % 前向传播
            net = cnnbp(net, batch_y);   % BP反向传播
            net = cnnapplygrads(net, opts);
            if isempty(net.rL)
                net.rL(1) = net.L;
            end
            net.rL(end + 1) = 0.99 * net.rL(end) + 0.01 * net.L;
        end
        toc;
    end

end

下面是cnnff.m中的代码及注释。

function net = cnnff(net, x)
    n = numel(net.layers);
    net.layers{1}.a{1} = x;
    inputmaps = 1;

    for l = 2 : n   % 除输入层以外的每一层
        if strcmp(net.layers{l}.type, 'c') % 卷积层
            %  !!below can probably be handled by insane matrix operations
            for j = 1 : net.layers{l}.outputmaps   % 该层的每一个特征图
                % 该层的输出:上一层图片大小-kernel+1
                z = zeros(size(net.layers{l - 1}.a{1}) - [net.layers{l}.kernelsize - 1 net.layers{l}.kernelsize - 1 0]);
                for i = 1 : inputmaps   % 对于每一个输入特征图(本例中为全连接)
                    % 每个特征图的卷积都相加,convn()为matlab自带卷积函数
                    z = z + convn(net.layers{l - 1}.a{i}, net.layers{l}.k{i}{j}, 'valid');
                end
                % 加入偏置项,并通过sigmoid函数(现今一般采用RELU)
                net.layers{l}.a{j} = sigm(z + net.layers{l}.b{j});
            end
            % 用该层的outputmaps更新inputmaps的值并为下一层所用
            inputmaps = net.layers{l}.outputmaps;
        elseif strcmp(net.layers{l}.type, 's')
            % mean-pooling
            for j = 1 : inputmaps
                z = convn(net.layers{l - 1}.a{j}, ones(net.layers{l}.scale) / (net.layers{l}.scale ^ 2), 'valid');   %  !! replace with variable
                % 取出有效的mean-pooling矩阵,即每隔net.layers{l}.scale提取一个值
                net.layers{l}.a{j} = z(1 : net.layers{l}.scale : end, 1 : net.layers{l}.scale : end, :);
            end
        end
    end

    % 把最后一层展开变成一行向量方便操作
    net.fv = [];
    for j = 1 : numel(net.layers{n}.a)
        sa = size(net.layers{n}.a{j});
        net.fv = [net.fv; reshape(net.layers{n}.a{j}, sa(1) * sa(2), sa(3))];
    end
    % 加上权值和偏置项并通入sigmoid(多类别神经网络的输出一般采用softmax形式,损失函数使用cross entropy )
    net.o = sigm(net.ffW * net.fv + repmat(net.ffb, 1, size(net.fv, 2)));

end

下面是cnnbp.m中的代码及注释,比较复杂。首先要有普通BP的基础,可以参考CeleryChen的博客,而CNN的反向传播略有不同,可以参考tornadomeet的博客。

function net = cnnbp(net, y)
    n = numel(net.layers);
    net.e = net.o - y; % 误差
    % loss函数,这里采用的方法是MSE(多类别神经网络的输出一般采用softmax形式,损失函数使用cross entropy)
    net.L = 1/2* sum(net.e(:) .^ 2) / size(net.e, 2);

    %%  backprop deltas
    net.od = net.e .* (net.o .* (1 - net.o));   %  输出层delta,包括sigmoid求导(delta为loss函数对该层未经激活函数结点的导数)
    net.fvd = (net.ffW' * net.od);              %  隐藏层(倒数第二层)delta
    if strcmp(net.layers{n}.type, 'c')         % 只有卷积层才可以通过sigmoid函数,本CNN结构最后一个隐藏层为pooling
        net.fvd = net.fvd .* (net.fv .* (1 - net.fv));
    end

    % 把最后一隐藏层的delta向量变成形如a的向量
    sa = size(net.layers{n}.a{1});
    fvnum = sa(1) * sa(2);
    for j = 1 : numel(net.layers{n}.a)
        net.layers{n}.d{j} = reshape(net.fvd(((j - 1) * fvnum + 1) : j * fvnum, :), sa(1), sa(2), sa(3));
    end

    % 逆向传播,计算各层的delta
    for l = (n - 1) : -1 : 1
        if strcmp(net.layers{l}.type, 'c') % 卷积层且下一层为pooling层
            for j = 1 : numel(net.layers{l}.a) % 对该层每一个特征图操作(考虑sigmoid函数,upsample)
                net.layers{l}.d{j} = net.layers{l}.a{j} .* (1 - net.layers{l}.a{j}) .* (expand(net.layers{l + 1}.d{j}, [net.layers{l + 1}.scale net.layers{l + 1}.scale 1]) / net.layers{l + 1}.scale ^ 2);
            end
        elseif strcmp(net.layers{l}.type, 's') % pooling层且下一层为卷积层
            for i = 1 : numel(net.layers{l}.a)
                z = zeros(size(net.layers{l}.a{1}));
                for j = 1 : numel(net.layers{l + 1}.a) % 第l+1层所有特征核的贡献之和
                    X =  net.layers{l + 1}.k{i}{j};
                    z = z + convn(net.layers{l + 1}.d{j}, flipdim(flipdim(X, 1), 2), 'full');
                    %z = z + convn(net.layers{l + 1}.d{j}, rot180(net.layers{l + 1}.k{i}{j}), 'full');
                end
                net.layers{l}.d{i} = z;
            end
        end
    end

    %% 通过delta计算梯度
    for l = 2 : n
        if strcmp(net.layers{l}.type, 'c')  % 只有卷积层计算梯度,pooling层的参数固定不变
            for j = 1 : numel(net.layers{l}.a)
                for i = 1 : numel(net.layers{l - 1}.a)
                    net.layers{l}.dk{i}{j} = convn(flipall(net.layers{l - 1}.a{i}), net.layers{l}.d{j}, 'valid') / size(net.layers{l}.d{j}, 3);  % 旋转180°,卷积,再旋转180°
                end
                net.layers{l}.db{j} = sum(net.layers{l}.d{j}(:)) / size(net.layers{l}.d{j}, 3); % 所有delta相加,除以每一次批处理的个数
            end
        end
    end
    net.dffW = net.od * (net.fv') / size(net.od, 2); %除以每次批处理个数
    net.dffb = mean(net.od, 2);

    function X = rot180(X)
        X = flipdim(flipdim(X, 1), 2);
    end
end

下面是cnnapplygrads.m中的代码,比较简单,在这里就不进行注释了。

function net = cnnapplygrads(net, opts)
    for l = 2 : numel(net.layers)
        if strcmp(net.layers{l}.type, 'c')
            for j = 1 : numel(net.layers{l}.a)
                for ii = 1 : numel(net.layers{l - 1}.a)
                    net.layers{l}.k{ii}{j} = net.layers{l}.k{ii}{j} - opts.alpha * net.layers{l}.dk{ii}{j};
                end
                net.layers{l}.b{j} = net.layers{l}.b{j} - opts.alpha * net.layers{l}.db{j};
            end
        end
    end

    net.ffW = net.ffW - opts.alpha * net.dffW;
    net.ffb = net.ffb - opts.alpha * net.dffb;
end

你可能感兴趣的:(深度学习,机器学习)