总说: 这篇博客主要是讲VCNN框架的一些东西。表示还没怎么看懂。虽然有点眉目了。还是记录一下吧。
https://github.com/Georgezhouzhou/vcnn_double-bladed
addpath applications/deep_edge_aware_filters/utility/GT_filters/
addpath applications/deep_edge_aware_filters/utility/GT_filters/L0smoothing/
addpath data/
clear;
patch_dim = 64;
num_patches = 1000;
listing = dir('data/deepeaf/BSDS500/*.jpg');
fListing = dir('data/deepeaf/fImgs/*.jpg');
for m = 1 : 101
fprintf('Extracting patch batch: %d / %d\n', m, 101);
% extract random patches
samples = zeros(patch_dim, patch_dim, 3, num_patches);
labels = zeros(size(samples));
for i = 1 : num_patches / 8 % 1000张共要随机抽取125次图,有重复,每次随机选取左上角的点提取patch
if (mod(i,100) == 0)
fprintf('Extracting patch: %d / %d\n', i*8, num_patches);
end
r_idx = random('unid', size(listing, 1));
%由于命名顺序相同,因此图片x对应的index与该图片滤波后对应的index相同
I = imread(strcat('data/deepeaf/BSDS500/', listing(r_idx).name));
fI = imread(strcat('data/deepeaf/fImgs/',fListing(r_idx).name));
orig_img_size = size(I);
r = random('unid', orig_img_size(1) - patch_dim + 1);
c = random('unid', orig_img_size(2) - patch_dim + 1);
% EdgeExtract只进行了竖直方向的提取,进行左右翻转,再到后面4个90度的旋转
% 就涵盖了matrix的8种全部形态
patch = I(r:r+patch_dim-1, c:c+patch_dim-1, :);
fpatch = fI(r:r+patch_dim-1, c:c+patch_dim-1, :);
patchHoriFlipped = fliplr(patch);
fpatch = fliplr(fpatch); % 同样进行翻转
idx_list = (i-1)*8+1:(i-1)*8+8;
for idx = 1:4
% samples存储的是in,也就是原始图片的梯度
% labels存储的是vout,也就是滤波后的图片的梯度
% 8个一组,1~4存储原始图像滤波后的图片的Iy, Ix, -Iy, -Ix
% 5~8存储着原始/滤波图片经过左对称后的图片的 Iy, Ix, -Iy, -Ix
% 每间隔4的两张图片左右对称
patch_rotated = im2double(imrotate(patch, (idx-1)*90));
patch_filtered = GT_filter(patch_rotated);
[vin, vout] = EdgeExtract(im2double(patch_rotated), im2double(patch_filtered));
samples(:,:,:,idx_list(idx)) = vin;
labels(:,:,:,idx_list(idx)) = vout;
patch_rotated = im2double(imrotate(patchHoriFlipped, (idx-1)*90));
patch_filtered = GT_filter(patch_rotated);
[vin, vout] = EdgeExtract(im2double(patch_rotated), im2double(patch_filtered));
samples(:,:,:,idx_list(idx+4)) = vin;
labels(:,:,:,idx_list(idx+4)) = vout;
end
end
samples = single(samples);
labels = single(labels);
% save it
filename = strcat('data/deepeaf/certainFilter/train/patches_', num2str(m));
save(filename, '-v7.3', 'samples', 'labels');
end
config.GEN_OUTPUT = @gen_output_copy;
.NEW_MEM = @to_gpu (这个就是把数据转换成gpu数据,gpuArray(single(x)) )
.IM2COL = @im2col_gpu
.NONLINEARITY = @relu;
.OUT_ACT = @nonlinearity_nil;
config.COST_FUN = @L2_norm;
.misc下面放置的是杂项,其他的一些配置
.misc.current_layer = 1
r = config.weight_range;
conv_layer_c = 0;
pool_layer_c = 0;
full_layer_c = 0;
layer_num = length(config.forward_pass_scheme)-1;
config.layer_num = layer_num;
config.feature_map_sizes = {}; %这个是三维,第三维表示改成的feturemap的层数或是说“厚度”,即conv_hidden_size
config.weights = {};
for idx = 1:layer_num
if idx == 1
....
if strcmp(config.forward_pass_scheme{idx}, 'conv_v_sr')
...
elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v')
config.weights{idx} = config.NEW_MEM(randn(config.feature_map_sizes{idx}(3), ...
config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.chs)*r);
if config.normalize_init_weights
config.weights{idx} = config.weights{idx} / sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * config.conv_hidden_size(conv_layer_c));
end
这里主要是针对不同层的形式,进行设置。
对于第一层来说,conv_v的weights进行初始化,如果进行normalize_init_weights,则进行normalize一下。
如果是其他层(非第一层),那么
elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v')
conv_layer_c = conv_layer_c + 1;
config.feature_map_sizes{idx} = [config.feature_map_sizes{idx-1}(1)-config.kernel_size(conv_layer_c,1)+1 ... config.feature_map_sizes{idx-1}(2)-config.kernel_size(conv_layer_c,2)+1 ... config.conv_hidden_size(conv_layer_c)];
config.weights{idx} = config.NEW_MEM(randn(config.feature_map_sizes{idx}(3), ...
config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.feature_map_sizes{idx-1}(3))*r);
if config.normalize_init_weights
config.weights{idx} = config.weights{idx} / sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * config.conv_hidden_size(conv_layer_c));
end
可以看出,feature_map_size{idx}(第几维),一般来说featuremap共3维,前两维是空间大小,而第三维就是conv_hidden_size(conv_layer_c)的大小,其中conv_layer_c是表示当前层数。normalize代码是一样的。
可以看出第一层之所以特殊是因为他是输入,其它层要根据前一层得到featrue_map的大小以及其他的一些信息。
elseif strcmp(config.forward_pass_scheme{idx}, 'conv_f')
conv_layer_c = conv_layer_c + 1;
if idx == layer_num
config.weights{idx} = config.NEW_MEM(randn(config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.output_size(3), config.conv_hidden_size(conv_layer_c-1))*r);
if config.normalize_init_weights
config.weights{idx} = config.weights{idx} / sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * size(config.weights{idx}, 1));
end
config.GEN_OUTPUT = @gen_output_from_conv_f;
else
fprintf('in init(): conv_f layer in the hidden layer not supported yet.\n');
end
注意:由代码可知conv_f只能在最后一层出现。另外,初始化weights时,并不是另weights~N(0,1),而一般是要乘以一个参数。此时这个参数就是r,即在deep_configure.m中定义的config.weight_range.
这样的话就把网络根据config的配置进行了设计,包括weights初始化和正规化,以及每层的featuremap的大小。
for idx = 1:layer_num-1
config.weights{idx+layer_num} = config.NEW_MEM(zeros(config.feature_map_sizes{idx}(3), 1)+0.01);
end
if strcmp(config.forward_pass_scheme{layer_num}, 'conv_f')
config.weights{layer_num*2} = config.NEW_MEM(zeros(size(config.weights{layer_num}, 1), 1)+0.05);
else
config.weights{layer_num*2} = config.NEW_MEM(zeros(config.output_size(3), 1)+0.05);
end
可以看出,config.weights{idx}总共是有layer_num*2层,比如deep_edge_aware共有3层卷积层。配置forward_pass_scheme时是用’conv_v’,’conv_v’,’conv_f’,’out’来进行。layer_num = length(config.forward_pass_scheme)-1;
故layer_num = 4-1 = 3. weights{1~3}是真的卷积层的weights参数。然后weights{4~5}是对应的前2层的偏置。而weights{6}就是最后一层的偏置配置。
mem原来是memory的意思啊。分配内存的意思啊!!而.NEW_MEM是根据config.device是CPU还是GPU来定的。
先进行
reset_mem()和input_mem()
function input_mem()
global config mem;
mem.layer_inputs{1} = config.NEW_MEM(zeros(config.kernel_size(1, 1)*config.kernel_size(1, 2)*config.chs, ...
(config.input_size(1)-config.kernel_size(1, 1)+1)*(config.input_size(2)-config.kernel_size(1, 2)+1)*config.batch_size));
mem.activations{1} = config.NEW_MEM(zeros(config.feature_map_sizes{1}(3), config.feature_map_sizes{1}(1)*config.feature_map_sizes{1}(2)));
end
input_mem()是对输入进行初始化,并且.activations{1}就是zeros(.feature_map_size{1}(3), 第一层的空间).
if strcmp(config.forward_pass_scheme{2}, 'conv_v')
conv2conv_mem(1);
end
for m = 2:layer_num
if strfind(config.forward_pass_scheme{m}, 'conv')
conv_mem(m);
if strcmp(config.forward_pass_scheme{m+1}, 'out')
conv2out_mem();
elseif strcmp(config.forward_pass_scheme{m+1}, 'conv_v')
conv2conv_mem(m);
end
elseif strcmp(config.forward_pass_scheme{m}, 'pool')
pool_mem(m);
if strcmp(config.forward_pass_scheme{m+1}, 'conv_v')
pool2conv_mem(m);
end
elseif strcmp(config.forward_pass_scheme{m}, 'full')
full_mem(m);
end
end
这里是对层与层之间的联系进行分配内存。这些mem都在mem文件夹下。在deep_edge_aware中,主要是用到了conv2conv_mem和conv2out_mem.
在pipeline中主要是用到了layers_adapters文件夹下面的函数。
这里第一次出现.pipeline_forward.
config.pipeline_forward = {};
config.pipeline_forward{1} = @input2conv;
conv_layer_c = 1;
for idx = 1:layer_num
if strfind(config.forward_pass_scheme{idx}, 'conv')
conv_layer_c = conv_layer_c + 1;
...
if strcmp(config.forward_pass_scheme{idx+1}, 'conv_v')
config.pipeline_forward{length(config.pipeline_forward)+1} = @nonlinearity;
if config.kernel_size(conv_layer_c, 1) == 1 && config.kernel_size(conv_layer_c, 2) == 1
config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2conv1by1;
else
config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2conv;
end
elseif strcmp(config.forward_pass_scheme{idx+1}, 'conv_f')
config.pipeline_forward{length(config.pipeline_forward)+1} = @nonlinearity;
config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2conv_f;
可以看出这是对每层的前向传播进行计算。比如:conv2conv表示,从卷积层到卷积层的计算。这里当某个kernel的某一维的size为1时,有个特殊的函数计算,就是conv2conv1by1,这里是针对deep_edge_aware的第二层卷积层1x1的计算。
config.SCALE_INPUT = @scale_input_nil;
config.SCALE_OUTPUT = @scale_output_nil;
if flag ~= 0
return;
end
config.EXPAND_DELTA_OUT = @expand_delta_out_nil;
if strcmp(config.nonlinearity, 'relu')
config.DERI_NONLINEARITY = @deri_relu;
...
这是对激活函数的配置。如果输出层有激活函数的话,就会
if strcmp(config.output_activation, 'softmax')
config.DERI_OUT_ACT = @deri_softmax;
elseif strcmp(config.output_activation, 'inherit')
config.DERI_OUT_ACT = @deri_inherit;
elseif strcmp(config.output_activation, 'nil')
config.DERI_OUT_ACT = @deri_nonlinearity_nil;
else
config.DERI_OUT_ACT = @deri_softmax;
end
然而deep_edge_aware输出的激活函数为nil
for m = 2:layer_num
if strcmp(config.forward_pass_scheme{m}, 'conv_v')
if strcmp(config.forward_pass_scheme{m-1}, 'pool')
convBpool_mem(m);
elseif strfind(config.forward_pass_scheme{m}, 'conv')
conv_layer_id = get_conv_layer_idx_from_layer_idx(m);
if config.kernel_size(conv_layer_id, 1) ~= 1 && config.kernel_size(conv_layer_id, 2) ~= 1
convBconv_mem(m);
end
end
end
end
说实话没看懂这个是要干嘛。带有B的函数就是反向的意思,这里貌似是要检查一下从第二层开始到最后一层的特殊连接,算了。
这里是重点,反向传播就在这里了。
看到这里我真的想说,这个框架真是无力吐槽了。。主要是这个在前向传播时每一层都考虑了后一层是什么,而在反向传播时每一层又考虑了前一层是什么。每次都是根据相邻两层来决定如何传播。这实在是太冗余复杂了。以后还是用matconvnet吧。这个玩不动啊。。
config.pipeline_backprop = {};
config.pipeline_backprop{1} = @out_backprop;
for idx = layer_num+1:-1:3
if strcmp(config.forward_pass_scheme{idx}, 'out')
if strcmp(config.forward_pass_scheme{idx-1}, 'conv_f')
config.EXPAND_DELTA_OUT = @expand_delta_out_for_conv_f;
config.pipeline_backprop{length(config.pipeline_backprop)+1} = @outBconv;
config.pipeline_backprop{length(config.pipeline_backprop)+1} = @conv_backprop;
...
else
fprintf('in init(): backprop from the output layer to the specified layer is not yet supported.\n');
end
elseif strcmp(config.forward_pass_scheme{idx}, 'conv_f')
if strcmp(config.forward_pass_scheme{idx-1}, 'conv_v')
config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv_1by1;
else
fprintf('in init(): backprop from conv_f to the specified layer is not yet supported.\n');
end
config.pipeline_backprop{length(config.pipeline_backprop)+1} = @conv_backprop;
elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v')
if strfind(config.forward_pass_scheme{idx-1}, 'conv')
conv_layer_id = get_conv_layer_idx_from_layer_idx(idx);
if config.kernel_size(conv_layer_id, 1) == 1 && config.kernel_size(conv_layer_id, 2) == 1
config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv_1by1;
else
config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv;
end
config.pipeline_backprop{length(config.pipeline_backprop)+1} = @conv_backprop;
...
end
以上是对从最后一层到倒数第三层的处理,对于最后两层要做特殊处理
if strcmp(config.forward_pass_scheme{2}, 'conv_v') && config.kernel_size(2, 1) ~= 1 && config.kernel_size(2, 2) ~= 1
config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv_last;
end
if strcmp(config.forward_pass_scheme{1}, 'conv_v_mask_norm')
if strcmp(config.mask_for_SR, 'true')
config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput_with_mask_accel;
else
config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput_with_mask;
end
elseif strcmp(config.forward_pass_scheme{1}, 'conv_v_sr')
config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput_SR;
else
config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput;
end
因为convBconv_last适用于倒数第二层到最后一层,而最后一层是输入到卷积(对于deepeaf),因此有 ‘@convBinput’
这里出现.UPDATE_WEIGHTS
if strcmp(config.optimization, 'adagrad')
config.his_grad = {};
config.fudge_factor = 1e-6;
if strcmp(config.forward_pass_scheme{1}, 'conv_v_sr')
config.UPDATE_WEIGHTS = @update_weights_adagrad_SR;
config.his_grad{1} = {};
for m = 1:config.misc.mask_type
config.his_grad{1}{m} = config.NEW_MEM(zeros(size(config.weights{1}{m})));
end
for m = 2:length(config.weights)
config.his_grad{m} = config.NEW_MEM(zeros(size(config.weights{m})));
end
else
config.UPDATE_WEIGHTS = @update_weights_adagrad;
for m = 1:length(config.weights)
config.his_grad{m} = config.NEW_MEM(zeros(size(config.weights{m})));
end
end
else
fprintf('optimization method not supported, use adagrad as default\n');
config.UPDATE_WEIGHTS = @update_weights_adagrad;
end
而对于update_weights_adagrad,有如下代码:
function update_weights_adagrad()
global config mem;
for m = 1:length(config.weights)
config.his_grad{m} = config.his_grad{m} + mem.grads{m} .* mem.grads{m};
config.weights{m} = config.weights{m} - config.learning_rate * (mem.grads{m} ./ (config.fudge_factor + sqrt(config.his_grad{m})));
end
end
addpath applications/deep_edge_aware_filters/
addpath applications/deep_edge_aware_filters/utility/
addpath utils/
addpath cuda/
addpath mem/
addpath layers/
addpath layers_adapters/
addpath optimization/
addpath pipeline/
addpath data/
clearvars -global config;
clearvars -global mem;
clear;
global config mem;
deepeaf_configure(); %config进行初始化
init(0); %根据config进行初始化网络,完成其他配置以及对前向/反向传播各层之间的参数设置进行初始化
load('data/deepeaf/certainFilter/val/val_1');
perm = randperm(size(test_samples, 4));
test_samples = test_samples(:,:,:,perm);
test_labels = test_labels(:,:,:,perm);
test_samples = config.NEW_MEM(test_samples(:,:,:,1:200));
test_labels = config.NEW_MEM(test_labels(:,:,:,1:200));
test_samples = test_samples * 2;
test_labels = test_labels * 2;
count = 0;
cost_avg = 0;
epoc = 0;
points_seen = 0;
display_points = 500;
save_points = 5000;
fprintf('%s\n', datestr(now, 'dd-mm-yyyy HH:MM:SS FFF'));
for pass = 1:10 % 跑10次
for p = 1:100 % 每次100个
load(strcat('data/deepeaf/certainFilter/train/patches_', num2str(p), '.mat'));
perm = randperm(1000);
samples = samples(:,:,:,perm);
labels = labels(:,:,:,perm);
train_imgs = config.NEW_MEM(samples);
train_labels = config.NEW_MEM(labels);
train_imgs = train_imgs * 2;
train_labels = train_labels * 2;
for i = 1:size(train_labels, 4) / config.batch_size
points_seen = points_seen + config.batch_size;
% 对in和out进行提取,其中out需要根据output_size进行特殊的裁剪。
in = train_imgs(:,:,:,(i-1)*config.batch_size+1:i*config.batch_size);
out = train_labels(:,:,:,(i-1)*config.batch_size+1:i*config.batch_size);
out = out((size(in, 1) - config.output_size(1)) / 2 + 1:(size(in, 1) - config.output_size(1)) / 2 + config.output_size(1), ...
(size(in, 2) - config.output_size(2)) / 2 + 1:(size(in, 2) - config.output_size(2)) / 2 + config.output_size(2), :, :);
% operate the training pipeline
op_train_pipe(in, out);
% update the weights
% 这里就是默认的adagrad了!
config.UPDATE_WEIGHTS();
if(cost_avg == 0)
cost_avg = config.cost;
else
cost_avg = (cost_avg + config.cost) / 2;
end
% display point
if(mod(points_seen, display_points) == 0)
count = count + 1;
fprintf('%d ', count);
end
% save point
if(mod(points_seen, save_points) == 0)
fprintf('\n%s', datestr(now, 'dd-mm-yyyy HH:MM:SS FFF'));
epoc = epoc + 1;
test_cost = 0;
for t = 1:size(test_samples, 4) / config.batch_size
% t_label也是要根据output_size来进行裁剪
t_label = test_labels(:,:,:,(t-1)*config.batch_size+1:t*config.batch_size);
t_label = config.NEW_MEM(t_label((size(in, 1) - config.output_size(1)) / 2 + 1:(size(in, 1) - config.output_size(1)) / 2 + config.output_size(1), ...
(size(in, 2) - config.output_size(2)) / 2 + 1:(size(in, 2) - config.output_size(2)) / 2 + config.output_size(2), :));
% 进行测试的pipeline,测试的pipeline只有前向传播
op_test_pipe(test_samples(:,:,:,(t-1)*config.batch_size+1:t*config.batch_size), t_label);
test_out = gather(mem.output);
test_cost = test_cost + config.cost;
end
test_cost = test_cost / size(test_samples, 4);
fprintf('\nepoc %d, training avg cost: %f, test avg cost: %f\n', epoc, cost_avg, test_cost);
save_weights(strcat('applications/deep_edge_aware_filters/results/epoc', num2str(epoc), '.mat'));
cost_avg = 0;
end
end
end