昨日面试,被问道BN层和LRN层,一直以来用的都是bn,所以当时对LRN只剩下点印象,事后弥补了一下这边知识点的不足,浅谈自己对LRN层的理解,如果误导,欢迎指正.
LRN层的作用是对局部区域进行归一化,对响应比较大的值变得更大,对响应比较小的值抑制.先看一下caffe的代码
可以看到四个参数.
layer {
name: "norm1"
type: "LRN"
bottom: "conv1"
top: "norm1"
lrn_param {
// norm_region:ACROSS_CHANNELS,表示相邻通道求和归一化,局部区域块形状为//local_size*1*1,如果是WITHIN_CHANNEL表示一个通道内部求和归一,局部区域形状为//1*local_zie*local_size
//
local_size: 5//默认值5,如果是跨通道的,表示求和的通道数;如果在通道内,表示求和正方形区域长度
alpha: 0.0001//公式的参数,默认为1
beta: 0.75//公式的参数,默认为5
}
}
归一化时,每个输入值都将除以,公式里对应的公式就是参数里对应的值,当然,后面的caffe版本中1是K,也是一个参数值,看看源码.在caffe/src/caffe/layers/lrn_layer.cpp中,代码上有对应的注释
#include
#include "caffe/layers/lrn_layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template
void LRNLayer::LayerSetUp(const vector*>& bottom,
const vector*>& top) {
size_ = this->layer_param_.lrn_param().local_size();//提取size_的值
CHECK_EQ(size_ % 2, 1) << "LRN only supports odd values for local_size";//检查是否为奇数,不是则报错
pre_pad_ = (size_ - 1) / 2; //计算pad值,前后各补一半0
alpha_ = this->layer_param_.lrn_param().alpha();//提取alpha
beta_ = this->layer_param_.lrn_param().beta();//提取beta
k_ = this->layer_param_.lrn_param().k();//提取k
if (this->layer_param_.lrn_param().norm_region() ==//如果是within_channel模式,还需要初始化一系列中间层,暂且不关注
LRNParameter_NormRegion_WITHIN_CHANNEL) {
// Set up split_layer_ to use inputs in the numerator and denominator.
split_top_vec_.clear();
split_top_vec_.push_back(&product_input_);
split_top_vec_.push_back(&square_input_);
LayerParameter split_param;
split_layer_.reset(new SplitLayer(split_param));
split_layer_->SetUp(bottom, split_top_vec_);
// Set up square_layer_ to square the inputs.
square_bottom_vec_.clear();
square_top_vec_.clear();
square_bottom_vec_.push_back(&square_input_);
square_top_vec_.push_back(&square_output_);
LayerParameter square_param;
square_param.mutable_power_param()->set_power(Dtype(2));
square_layer_.reset(new PowerLayer(square_param));
square_layer_->SetUp(square_bottom_vec_, square_top_vec_);
// Set up pool_layer_ to sum over square neighborhoods of the input.
pool_top_vec_.clear();
pool_top_vec_.push_back(&pool_output_);
LayerParameter pool_param;
pool_param.mutable_pooling_param()->set_pool(
PoolingParameter_PoolMethod_AVE);
pool_param.mutable_pooling_param()->set_pad(pre_pad_);
pool_param.mutable_pooling_param()->set_kernel_size(size_);
pool_layer_.reset(new PoolingLayer(pool_param));
pool_layer_->SetUp(square_top_vec_, pool_top_vec_);
// Set up power_layer_ to compute (1 + alpha_/N^2 s)^-beta_, where s is
// the sum of a squared neighborhood (the output of pool_layer_).
power_top_vec_.clear();
power_top_vec_.push_back(&power_output_);
LayerParameter power_param;
power_param.mutable_power_param()->set_power(-beta_);
power_param.mutable_power_param()->set_scale(alpha_);
power_param.mutable_power_param()->set_shift(Dtype(1));
power_layer_.reset(new PowerLayer(power_param));
power_layer_->SetUp(pool_top_vec_, power_top_vec_);
// Set up a product_layer_ to compute outputs by multiplying inputs by the
// inverse demoninator computed by the power layer.
product_bottom_vec_.clear();
product_bottom_vec_.push_back(&product_input_);
product_bottom_vec_.push_back(&power_output_);
LayerParameter product_param;
EltwiseParameter* eltwise_param = product_param.mutable_eltwise_param();
eltwise_param->set_operation(EltwiseParameter_EltwiseOp_PROD);
product_layer_.reset(new EltwiseLayer(product_param));
product_layer_->SetUp(product_bottom_vec_, top);
}
}
template
void LRNLayer::Reshape(const vector*>& bottom,
const vector*>& top) {
CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
<< "corresponding to (num, channels, height, width)";
num_ = bottom[0]->num();
channels_ = bottom[0]->channels();
height_ = bottom[0]->height();
width_ = bottom[0]->width();//初始化n\c\h\w的值
switch (this->layer_param_.lrn_param().norm_region()) {//看参数normal_region选的是什么
case LRNParameter_NormRegion_ACROSS_CHANNELS://如果是通道间归一化
top[0]->Reshape(num_, channels_, height_, width_);//将top尺寸和bottom尺寸设置成一样大,这样归一化时,只需要
//scale_值乘bottom值就可以得到相应的top值
scale_.Reshape(num_, channels_, height_, width_);
break;
case LRNParameter_NormRegion_WITHIN_CHANNEL://如果是空间区域归一化
split_layer_->Reshape(bottom, split_top_vec_);
square_layer_->Reshape(square_bottom_vec_, square_top_vec_);
pool_layer_->Reshape(square_top_vec_, pool_top_vec_);
power_layer_->Reshape(pool_top_vec_, power_top_vec_);
product_layer_->Reshape(product_bottom_vec_, top);
break;
}
}
template
void LRNLayer::Forward_cpu(const vector*>& bottom,
const vector*>& top) {
switch (this->layer_param_.lrn_param().norm_region()) {
case LRNParameter_NormRegion_ACROSS_CHANNELS:
CrossChannelForward_cpu(bottom, top);
break;
case LRNParameter_NormRegion_WITHIN_CHANNEL:
WithinChannelForward(bottom, top);
break;
default:
LOG(FATAL) << "Unknown normalization region.";
}
}
template
void LRNLayer::CrossChannelForward_cpu(
const vector*>& bottom, vector*>* top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = (*top)[0]->mutable_cpu_data();
Dtype* scale_data = scale_.mutable_cpu_data();//用指针获取每个Blob对象的内存地址,便于后面操作
// start with the constant value
for (int i = 0; i < scale_.count(); ++i) {//初始化值为1.0
scale_data[i] = 1.;
}
Blob padded_square(1, channels_ + size_ - 1, height_, width_);//补零后的Blob,第三维尺寸比bottom大了size_ - 1;
Dtype* padded_square_data = padded_square.mutable_cpu_data();
caffe_set(padded_square.count(), Dtype(0), padded_square_data);//先清零
Dtype alpha_over_size = alpha_ / size_;//预先计算公式中的alpha/n
// go through the images
for (int n = 0; n < num_; ++n) {//bottom的第四维尺寸num_,需要分解为单个来做归一化
// compute the padded square
caffe_sqr(channels_ * height_ * width_,
bottom_data + bottom[0]->offset(n),
padded_square_data + padded_square.offset(0, pre_pad_));//计算bottom的平方,放入padded_square矩阵中,前pre_pad_个位置依旧0
// Create the first channel scale
for (int c = 0; c < size_; ++c) {//对n个通道平方求和并乘以预先算好的(alpha/n),累加至scale_中(实现计算 1 + sum_under_i(x_i^2))
caffe_axpy(height_ * width_, alpha_over_size,
padded_square_data + padded_square.offset(0, c),
scale_data + scale_.offset(n, 0));
}
for (int c = 1; c < channels_; ++c) {//这里使用了类似FIFO的形式计算其余scale_参数,每次向后移动一个单位,加头去尾,避免重复计算求和
// copy previous scale
caffe_copy(height_ * width_,
scale_data + scale_.offset(n, c - 1),
scale_data + scale_.offset(n, c));
// add head
caffe_axpy(height_ * width_, alpha_over_size,
padded_square_data + padded_square.offset(0, c + size_ - 1),
scale_data + scale_.offset(n, c));
// subtract tail
caffe_axpy(height_ * width_, -alpha_over_size,
padded_square_data + padded_square.offset(0, c - 1),
scale_data + scale_.offset(n, c));
}
}
// In the end, compute output
caffe_powx(scale_.count(), scale_data, -beta_, top_data);//计算求指数,由于将除法转换为乘法,故指数变负
caffe_mul(scale_.count(), top_data, bottom_data, top_data);//bottom .* scale_ -> top
}
template
void LRNLayer::WithinChannelForward(
const vector*>& bottom, const vector*>& top) {
split_layer_->Forward(bottom, split_top_vec_);
square_layer_->Forward(square_bottom_vec_, square_top_vec_);
pool_layer_->Forward(square_top_vec_, pool_top_vec_);
power_layer_->Forward(pool_top_vec_, power_top_vec_);
product_layer_->Forward(product_bottom_vec_, top);
}
template
void LRNLayer::Backward_cpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom) {
switch (this->layer_param_.lrn_param().norm_region()) {
case LRNParameter_NormRegion_ACROSS_CHANNELS:
CrossChannelBackward_cpu(top, propagate_down, bottom);
break;
case LRNParameter_NormRegion_WITHIN_CHANNEL:
WithinChannelBackward(top, propagate_down, bottom);
break;
default:
LOG(FATAL) << "Unknown normalization region.";
}
}
template
void LRNLayer::CrossChannelBackward_cpu(
const vector*>& top, const vector& propagate_down,
const vector*>& bottom) {
const Dtype* top_diff = top[0]->cpu_diff();
const Dtype* top_data = top[0]->cpu_data();
const Dtype* bottom_data = bottom[0]->cpu_data();
const Dtype* scale_data = scale_.cpu_data();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
Blob padded_ratio(1, channels_ + size_ - 1, height_, width_);
Blob accum_ratio(1, 1, height_, width_);
Dtype* padded_ratio_data = padded_ratio.mutable_cpu_data();
Dtype* accum_ratio_data = accum_ratio.mutable_cpu_data();
// We hack a little bit by using the diff() to store an additional result
Dtype* accum_ratio_times_bottom = accum_ratio.mutable_cpu_diff();
caffe_set(padded_ratio.count(), Dtype(0), padded_ratio_data);
Dtype cache_ratio_value = 2. * alpha_ * beta_ / size_;
caffe_powx(scale_.count(), scale_data, -beta_, bottom_diff);
caffe_mul(scale_.count(), top_diff, bottom_diff, bottom_diff);
// go through individual data
int inverse_pre_pad = size_ - (size_ + 1) / 2;
for (int n = 0; n < num_; ++n) {
int block_offset = scale_.offset(n);
// first, compute diff_i * y_i / s_i
caffe_mul(channels_ * height_ * width_,
top_diff + block_offset, top_data + block_offset,
padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad));
caffe_div(channels_ * height_ * width_,
padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad),
scale_data + block_offset,
padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad));
// Now, compute the accumulated ratios and the bottom diff
caffe_set(accum_ratio.count(), Dtype(0), accum_ratio_data);
for (int c = 0; c < size_ - 1; ++c) {
caffe_axpy(height_ * width_, 1.,
padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data);
}
for (int c = 0; c < channels_; ++c) {
caffe_axpy(height_ * width_, 1.,
padded_ratio_data + padded_ratio.offset(0, c + size_ - 1),
accum_ratio_data);
// compute bottom diff
caffe_mul(height_ * width_,
bottom_data + top[0]->offset(n, c),
accum_ratio_data, accum_ratio_times_bottom);
caffe_axpy(height_ * width_, -cache_ratio_value,
accum_ratio_times_bottom, bottom_diff + top[0]->offset(n, c));
caffe_axpy(height_ * width_, -1.,
padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data);
}
}
}
template
void LRNLayer::WithinChannelBackward(
const vector*>& top, const vector& propagate_down,
const vector*>& bottom) {
if (propagate_down[0]) {
vector product_propagate_down(2, true);
product_layer_->Backward(top, product_propagate_down, product_bottom_vec_);
power_layer_->Backward(power_top_vec_, propagate_down, pool_top_vec_);
pool_layer_->Backward(pool_top_vec_, propagate_down, square_top_vec_);
square_layer_->Backward(square_top_vec_, propagate_down,
square_bottom_vec_);
split_layer_->Backward(split_top_vec_, propagate_down, bottom);
}
}
#ifdef CPU_ONLY
STUB_GPU(LRNLayer);
STUB_GPU_FORWARD(LRNLayer, CrossChannelForward);
STUB_GPU_BACKWARD(LRNLayer, CrossChannelBackward);
#endif
INSTANTIATE_CLASS(LRNLayer);
} // namespace caffe