VCNN初步探索

总说: 这篇博客主要是讲VCNN框架的一些东西。表示还没怎么看懂。虽然有点眉目了。还是记录一下吧。
https://github.com/Georgezhouzhou/vcnn_double-bladed

Notes on gen_training_data.m

addpath applications/deep_edge_aware_filters/utility/GT_filters/
addpath applications/deep_edge_aware_filters/utility/GT_filters/L0smoothing/
addpath data/

clear;

patch_dim = 64;
num_patches = 1000;
listing = dir('data/deepeaf/BSDS500/*.jpg');
fListing = dir('data/deepeaf/fImgs/*.jpg'); 


for m = 1 : 101
    fprintf('Extracting patch batch: %d / %d\n', m, 101);
    % extract random patches
    samples = zeros(patch_dim, patch_dim, 3, num_patches);
    labels = zeros(size(samples));
    for i = 1 : num_patches / 8  % 1000张共要随机抽取125次图,有重复,每次随机选取左上角的点提取patch
        if (mod(i,100) == 0)
            fprintf('Extracting patch: %d / %d\n', i*8, num_patches);
        end
        r_idx = random('unid', size(listing, 1));
        %由于命名顺序相同,因此图片x对应的index与该图片滤波后对应的index相同
        I = imread(strcat('data/deepeaf/BSDS500/', listing(r_idx).name));
        fI = imread(strcat('data/deepeaf/fImgs/',fListing(r_idx).name));

        orig_img_size = size(I);
        r = random('unid', orig_img_size(1) - patch_dim + 1);
        c = random('unid', orig_img_size(2) - patch_dim + 1);

        % EdgeExtract只进行了竖直方向的提取,进行左右翻转,再到后面4个90度的旋转 
        % 就涵盖了matrix的8种全部形态
        patch = I(r:r+patch_dim-1, c:c+patch_dim-1, :);
        fpatch = fI(r:r+patch_dim-1, c:c+patch_dim-1, :);
        patchHoriFlipped = fliplr(patch);
        fpatch = fliplr(fpatch);  % 同样进行翻转
        idx_list = (i-1)*8+1:(i-1)*8+8;
        for idx = 1:4
            % samples存储的是in,也就是原始图片的梯度
            % labels存储的是vout,也就是滤波后的图片的梯度
            % 8个一组,1~4存储原始图像滤波后的图片的Iy, Ix, -Iy, -Ix
            % 5~8存储着原始/滤波图片经过左对称后的图片的 Iy, Ix, -Iy, -Ix
            % 每间隔4的两张图片左右对称
            patch_rotated = im2double(imrotate(patch, (idx-1)*90));
             patch_filtered = GT_filter(patch_rotated);
                [vin, vout] = EdgeExtract(im2double(patch_rotated), im2double(patch_filtered));
             samples(:,:,:,idx_list(idx)) = vin;
             labels(:,:,:,idx_list(idx)) = vout;            

            patch_rotated = im2double(imrotate(patchHoriFlipped, (idx-1)*90));
            patch_filtered = GT_filter(patch_rotated);  
            [vin, vout] = EdgeExtract(im2double(patch_rotated), im2double(patch_filtered));            
           samples(:,:,:,idx_list(idx+4)) = vin;
             labels(:,:,:,idx_list(idx+4)) = vout;                            
        end
    end
    samples = single(samples);
    labels = single(labels);
    % save it
    filename = strcat('data/deepeaf/certainFilter/train/patches_', num2str(m));
    save(filename, '-v7.3', 'samples', 'labels');
end

Notes on init.m

首先是根据config进行相应的配置

config.GEN_OUTPUT = @gen_output_copy;

.NEW_MEM = @to_gpu (这个就是把数据转换成gpu数据,gpuArray(single(x)) )

.IM2COL = @im2col_gpu

.NONLINEARITY = @relu;

.OUT_ACT = @nonlinearity_nil;

config.COST_FUN = @L2_norm;

.misc下面放置的是杂项,其他的一些配置
.misc.current_layer = 1

初始化权值以及计算一些统计量

第一层

    r = config.weight_range;    
    conv_layer_c = 0;
    pool_layer_c = 0;
    full_layer_c = 0;
    layer_num = length(config.forward_pass_scheme)-1;
    config.layer_num = layer_num;
    config.feature_map_sizes = {};  %这个是三维,第三维表示改成的feturemap的层数或是说“厚度”,即conv_hidden_size
    config.weights = {};
    for idx = 1:layer_num
        if idx == 1
        ....
        if strcmp(config.forward_pass_scheme{idx}, 'conv_v_sr')
         ...
        elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v')
                config.weights{idx} = config.NEW_MEM(randn(config.feature_map_sizes{idx}(3), ...
                                                  config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.chs)*r);
                    if config.normalize_init_weights
                        config.weights{idx} = config.weights{idx} / sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * config.conv_hidden_size(conv_layer_c));
                    end

这里主要是针对不同层的形式,进行设置。
对于第一层来说,conv_v的weights进行初始化,如果进行normalize_init_weights,则进行normalize一下。

其他层

如果是其他层(非第一层),那么

    elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v')
        conv_layer_c = conv_layer_c + 1;
            config.feature_map_sizes{idx} = [config.feature_map_sizes{idx-1}(1)-config.kernel_size(conv_layer_c,1)+1 ... config.feature_map_sizes{idx-1}(2)-config.kernel_size(conv_layer_c,2)+1 ... config.conv_hidden_size(conv_layer_c)];
            config.weights{idx} = config.NEW_MEM(randn(config.feature_map_sizes{idx}(3), ...
            config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.feature_map_sizes{idx-1}(3))*r);
            if config.normalize_init_weights
                config.weights{idx} = config.weights{idx} / sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * config.conv_hidden_size(conv_layer_c));
            end

可以看出,feature_map_size{idx}(第几维),一般来说featuremap共3维,前两维是空间大小,而第三维就是conv_hidden_size(conv_layer_c)的大小,其中conv_layer_c是表示当前层数。normalize代码是一样的。
可以看出第一层之所以特殊是因为他是输入,其它层要根据前一层得到featrue_map的大小以及其他的一些信息。


    elseif strcmp(config.forward_pass_scheme{idx}, 'conv_f')
        conv_layer_c = conv_layer_c + 1;
        if idx == layer_num
        config.weights{idx} = config.NEW_MEM(randn(config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.output_size(3), config.conv_hidden_size(conv_layer_c-1))*r);
        if config.normalize_init_weights
        config.weights{idx} = config.weights{idx} / sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * size(config.weights{idx}, 1));
        end
        config.GEN_OUTPUT = @gen_output_from_conv_f;
        else
        fprintf('in init(): conv_f layer in the hidden layer not supported yet.\n');
        end

注意:由代码可知conv_f只能在最后一层出现。另外,初始化weights时,并不是另weights~N(0,1),而一般是要乘以一个参数。此时这个参数就是r,即在deep_configure.m中定义的config.weight_range.

这样的话就把网络根据config的配置进行了设计,包括weights初始化和正规化,以及每层的featuremap的大小。

初始化偏置

    for idx = 1:layer_num-1
        config.weights{idx+layer_num} = config.NEW_MEM(zeros(config.feature_map_sizes{idx}(3), 1)+0.01);
    end
    if strcmp(config.forward_pass_scheme{layer_num}, 'conv_f')
        config.weights{layer_num*2} = config.NEW_MEM(zeros(size(config.weights{layer_num}, 1), 1)+0.05);
    else
      config.weights{layer_num*2} = config.NEW_MEM(zeros(config.output_size(3), 1)+0.05);
    end

可以看出,config.weights{idx}总共是有layer_num*2层,比如deep_edge_aware共有3层卷积层。配置forward_pass_scheme时是用’conv_v’,’conv_v’,’conv_f’,’out’来进行。layer_num = length(config.forward_pass_scheme)-1;
故layer_num = 4-1 = 3. weights{1~3}是真的卷积层的weights参数。然后weights{4~5}是对应的前2层的偏置。而weights{6}就是最后一层的偏置配置。

prepare memory

mem原来是memory的意思啊。分配内存的意思啊!!而.NEW_MEM是根据config.device是CPU还是GPU来定的。
先进行
reset_mem()和input_mem()

    function input_mem()
    global config mem;
        mem.layer_inputs{1} = config.NEW_MEM(zeros(config.kernel_size(1, 1)*config.kernel_size(1, 2)*config.chs, ...
    (config.input_size(1)-config.kernel_size(1, 1)+1)*(config.input_size(2)-config.kernel_size(1, 2)+1)*config.batch_size));
        mem.activations{1} = config.NEW_MEM(zeros(config.feature_map_sizes{1}(3), config.feature_map_sizes{1}(1)*config.feature_map_sizes{1}(2)));
    end

input_mem()是对输入进行初始化,并且.activations{1}就是zeros(.feature_map_size{1}(3), 第一层的空间).

不同层之间的内存的分配

    if strcmp(config.forward_pass_scheme{2}, 'conv_v')
      conv2conv_mem(1);
    end
    for m = 2:layer_num
        if strfind(config.forward_pass_scheme{m}, 'conv')
            conv_mem(m);
            if strcmp(config.forward_pass_scheme{m+1}, 'out')
                conv2out_mem();
            elseif strcmp(config.forward_pass_scheme{m+1}, 'conv_v')
                conv2conv_mem(m);
         end
         elseif strcmp(config.forward_pass_scheme{m}, 'pool')
            pool_mem(m);
            if strcmp(config.forward_pass_scheme{m+1}, 'conv_v')
                pool2conv_mem(m);
            end
         elseif strcmp(config.forward_pass_scheme{m}, 'full')
             full_mem(m);
         end
    end

这里是对层与层之间的联系进行分配内存。这些mem都在mem文件夹下。在deep_edge_aware中,主要是用到了conv2conv_mem和conv2out_mem.

building pipeline

在pipeline中主要是用到了layers_adapters文件夹下面的函数。
这里第一次出现.pipeline_forward.

    config.pipeline_forward = {};
    config.pipeline_forward{1} = @input2conv;
    conv_layer_c = 1;
    for idx = 1:layer_num
    if strfind(config.forward_pass_scheme{idx}, 'conv')
        conv_layer_c = conv_layer_c + 1;
            ...
        if strcmp(config.forward_pass_scheme{idx+1}, 'conv_v')
             config.pipeline_forward{length(config.pipeline_forward)+1} = @nonlinearity;
             if config.kernel_size(conv_layer_c, 1) == 1 && config.kernel_size(conv_layer_c, 2) == 1
                config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2conv1by1;
             else
                 config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2conv;
             end
        elseif strcmp(config.forward_pass_scheme{idx+1}, 'conv_f')
            config.pipeline_forward{length(config.pipeline_forward)+1} = @nonlinearity;
            config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2conv_f;

可以看出这是对每层的前向传播进行计算。比如:conv2conv表示,从卷积层到卷积层的计算。这里当某个kernel的某一维的size为1时,有个特殊的函数计算,就是conv2conv1by1,这里是针对deep_edge_aware的第二层卷积层1x1的计算。

进行一些后续工作

    config.SCALE_INPUT = @scale_input_nil;
    config.SCALE_OUTPUT = @scale_output_nil;

    if flag ~= 0
    return;
    end
    config.EXPAND_DELTA_OUT = @expand_delta_out_nil;
    if strcmp(config.nonlinearity, 'relu')
        config.DERI_NONLINEARITY = @deri_relu;
    ...

这是对激活函数的配置。如果输出层有激活函数的话,就会

    if strcmp(config.output_activation, 'softmax')
        config.DERI_OUT_ACT = @deri_softmax;
    elseif strcmp(config.output_activation, 'inherit')
        config.DERI_OUT_ACT = @deri_inherit;
    elseif strcmp(config.output_activation, 'nil')
        config.DERI_OUT_ACT = @deri_nonlinearity_nil;
    else
        config.DERI_OUT_ACT = @deri_softmax;
    end

然而deep_edge_aware输出的激活函数为nil

    for m = 2:layer_num
        if strcmp(config.forward_pass_scheme{m}, 'conv_v')
            if strcmp(config.forward_pass_scheme{m-1}, 'pool')
                convBpool_mem(m);
            elseif strfind(config.forward_pass_scheme{m}, 'conv')
                conv_layer_id = get_conv_layer_idx_from_layer_idx(m);
                if config.kernel_size(conv_layer_id, 1) ~= 1 && config.kernel_size(conv_layer_id, 2) ~= 1
                    convBconv_mem(m);
                end
             end
        end
    end

说实话没看懂这个是要干嘛。带有B的函数就是反向的意思,这里貌似是要检查一下从第二层开始到最后一层的特殊连接,算了。

building pipeline for backprop

这里是重点,反向传播就在这里了。
看到这里我真的想说,这个框架真是无力吐槽了。。主要是这个在前向传播时每一层都考虑了后一层是什么,而在反向传播时每一层又考虑了前一层是什么。每次都是根据相邻两层来决定如何传播。这实在是太冗余复杂了。以后还是用matconvnet吧。这个玩不动啊。。

    config.pipeline_backprop = {};
    config.pipeline_backprop{1} = @out_backprop;
    for idx = layer_num+1:-1:3
        if strcmp(config.forward_pass_scheme{idx}, 'out')
            if strcmp(config.forward_pass_scheme{idx-1}, 'conv_f')
                config.EXPAND_DELTA_OUT = @expand_delta_out_for_conv_f;
                config.pipeline_backprop{length(config.pipeline_backprop)+1} = @outBconv;
                config.pipeline_backprop{length(config.pipeline_backprop)+1} = @conv_backprop;
            ...
            else
                fprintf('in init(): backprop from the output layer to the specified layer is not yet supported.\n');
            end            
        elseif strcmp(config.forward_pass_scheme{idx}, 'conv_f')
            if strcmp(config.forward_pass_scheme{idx-1}, 'conv_v')                
                config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv_1by1;                
            else
                fprintf('in init(): backprop from conv_f to the specified layer is not yet supported.\n');
            end
            config.pipeline_backprop{length(config.pipeline_backprop)+1} = @conv_backprop;
        elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v')
            if strfind(config.forward_pass_scheme{idx-1}, 'conv')
                conv_layer_id = get_conv_layer_idx_from_layer_idx(idx);
                if config.kernel_size(conv_layer_id, 1) == 1 && config.kernel_size(conv_layer_id, 2) == 1
                    config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv_1by1;
                else
                    config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv;
                end
                config.pipeline_backprop{length(config.pipeline_backprop)+1} = @conv_backprop;
                ...                        
    end

以上是对从最后一层到倒数第三层的处理,对于最后两层要做特殊处理

    if strcmp(config.forward_pass_scheme{2}, 'conv_v') && config.kernel_size(2, 1) ~= 1 && config.kernel_size(2, 2) ~= 1
        config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv_last;
    end
    if strcmp(config.forward_pass_scheme{1}, 'conv_v_mask_norm')
        if strcmp(config.mask_for_SR, 'true')
            config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput_with_mask_accel;
        else
            config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput_with_mask;
        end
    elseif strcmp(config.forward_pass_scheme{1}, 'conv_v_sr')
        config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput_SR;
    else
        config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput;
    end  

因为convBconv_last适用于倒数第二层到最后一层,而最后一层是输入到卷积(对于deepeaf),因此有 ‘@convBinput’

权值更新方式adagrad

这里出现.UPDATE_WEIGHTS

  if strcmp(config.optimization, 'adagrad')        
        config.his_grad = {};
        config.fudge_factor = 1e-6;
        if strcmp(config.forward_pass_scheme{1}, 'conv_v_sr')
            config.UPDATE_WEIGHTS = @update_weights_adagrad_SR;
            config.his_grad{1} = {};
            for m = 1:config.misc.mask_type
                config.his_grad{1}{m} = config.NEW_MEM(zeros(size(config.weights{1}{m})));
            end
            for m = 2:length(config.weights)
                config.his_grad{m} = config.NEW_MEM(zeros(size(config.weights{m})));
            end
        else
            config.UPDATE_WEIGHTS = @update_weights_adagrad;
            for m = 1:length(config.weights)
                config.his_grad{m} = config.NEW_MEM(zeros(size(config.weights{m})));
            end
        end
    else
        fprintf('optimization method not supported, use adagrad as default\n');
        config.UPDATE_WEIGHTS = @update_weights_adagrad;
    end

而对于update_weights_adagrad,有如下代码:

    function update_weights_adagrad()
        global config mem;
        for m = 1:length(config.weights)
            config.his_grad{m} = config.his_grad{m} + mem.grads{m} .* mem.grads{m};
            config.weights{m} = config.weights{m} - config.learning_rate * (mem.grads{m} ./ (config.fudge_factor + sqrt(config.his_grad{m})));
        end
    end

Notes on deepeaf_training

    addpath applications/deep_edge_aware_filters/
    addpath applications/deep_edge_aware_filters/utility/
    addpath utils/
    addpath cuda/
    addpath mem/
    addpath layers/
    addpath layers_adapters/
    addpath optimization/
    addpath pipeline/
    addpath data/

    clearvars -global config;
    clearvars -global mem;
    clear;
    global config mem;
    deepeaf_configure();  %config进行初始化
    init(0);  %根据config进行初始化网络,完成其他配置以及对前向/反向传播各层之间的参数设置进行初始化

    load('data/deepeaf/certainFilter/val/val_1');
    perm = randperm(size(test_samples, 4));
    test_samples = test_samples(:,:,:,perm);
    test_labels = test_labels(:,:,:,perm);
    test_samples = config.NEW_MEM(test_samples(:,:,:,1:200));
    test_labels = config.NEW_MEM(test_labels(:,:,:,1:200));
    test_samples = test_samples * 2;
    test_labels = test_labels * 2;

    count = 0;
    cost_avg = 0;
    epoc = 0;
    points_seen = 0;
    display_points = 500;
    save_points = 5000;
    fprintf('%s\n', datestr(now, 'dd-mm-yyyy HH:MM:SS FFF'));
    for pass = 1:10      % 跑10次 
        for p = 1:100    % 每次100个
            load(strcat('data/deepeaf/certainFilter/train/patches_', num2str(p), '.mat'));        
            perm = randperm(1000);
            samples = samples(:,:,:,perm);
            labels = labels(:,:,:,perm);

            train_imgs = config.NEW_MEM(samples);
            train_labels = config.NEW_MEM(labels); 
            train_imgs = train_imgs * 2;
            train_labels = train_labels * 2;

            for i = 1:size(train_labels, 4) / config.batch_size            
                points_seen = points_seen + config.batch_size;
                % 对in和out进行提取,其中out需要根据output_size进行特殊的裁剪。
                in = train_imgs(:,:,:,(i-1)*config.batch_size+1:i*config.batch_size);
                out = train_labels(:,:,:,(i-1)*config.batch_size+1:i*config.batch_size);
                out = out((size(in, 1) - config.output_size(1)) / 2 + 1:(size(in, 1) - config.output_size(1)) / 2 + config.output_size(1), ...
                          (size(in, 2) - config.output_size(2)) / 2 + 1:(size(in, 2) - config.output_size(2)) / 2 + config.output_size(2), :, :);

                % operate the training pipeline
                op_train_pipe(in, out);
                % update the weights
                % 这里就是默认的adagrad了!
                config.UPDATE_WEIGHTS();

                if(cost_avg == 0)
                    cost_avg = config.cost;
                else
                    cost_avg = (cost_avg + config.cost) / 2;
                end

                % display point
                if(mod(points_seen, display_points) == 0)   
                    count = count + 1;
                    fprintf('%d ', count);
                end
                % save point
                if(mod(points_seen, save_points) == 0)
                    fprintf('\n%s', datestr(now, 'dd-mm-yyyy HH:MM:SS FFF'));
                    epoc = epoc + 1;
                    test_cost = 0;
                    for t = 1:size(test_samples, 4) / config.batch_size
                    % t_label也是要根据output_size来进行裁剪
                        t_label = test_labels(:,:,:,(t-1)*config.batch_size+1:t*config.batch_size);
                        t_label = config.NEW_MEM(t_label((size(in, 1) - config.output_size(1)) / 2 + 1:(size(in, 1) - config.output_size(1)) / 2 + config.output_size(1), ...
                                                (size(in, 2) - config.output_size(2)) / 2 + 1:(size(in, 2) - config.output_size(2)) / 2 + config.output_size(2), :));

                        % 进行测试的pipeline,测试的pipeline只有前向传播
                        op_test_pipe(test_samples(:,:,:,(t-1)*config.batch_size+1:t*config.batch_size), t_label);
                        test_out = gather(mem.output);
                        test_cost = test_cost + config.cost;
                    end
                    test_cost = test_cost / size(test_samples, 4);
                    fprintf('\nepoc %d, training avg cost: %f, test avg cost: %f\n', epoc, cost_avg, test_cost);

                    save_weights(strcat('applications/deep_edge_aware_filters/results/epoc', num2str(epoc), '.mat'));
                    cost_avg = 0;
                end
            end
        end

你可能感兴趣的:(框架,cnn,VCNN)