【承接图像分类、检测、分割、生成相关项目,私信。】
论文: 《SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation》
SegNet:WebDemo
github: alexgkendall/caffe-segnet
github: TimoSaemann/caffe-segnet-cudnn5(推荐)
**
**
需要添加的层有两个 UpsampleLayer 和 BNLayer.
Upsample 是对图像进行放大使用的,功能可类比于 Deconv层。
BN 就是BatchNorm ,caffe官方也有实现,这里最好用作者的,省得再去改网络参数。
这里给出两个层的cpp hpp cu 文件。
除此之外,还要修改caffe.proto文件,声明这两个层。
//bn_layer.hpp
#ifndef CAFFE_BN_LAYER_HPP_
#define CAFFE_BN_LAYER_HPP_
#include
#include
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/common.hpp"
#include "caffe/syncedmem.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/filler.hpp"
/**
* @brief Batch Normalization per-channel with scale & shift linear transform.
*
*/
namespace caffe {
template
class BNLayer : public Layer {
public:
explicit BNLayer(const LayerParameter& param)
: Layer(param) {}
virtual void LayerSetUp(const vector*>& bottom,
const vector*>& top);
virtual void Reshape(const vector*>& bottom,
const vector*>& top);
virtual inline const char* type() const { return "BN"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 1; }
// if the BNMode is "LEARN" mamximum 3 top blobs are available
virtual inline int MaxTopBlobs() const {
return (this->layer_param_.bn_param().bn_mode() ==
BNParameter_BNMode_LEARN) ? 3 : 1;
}
protected:
virtual void Forward_cpu(const vector*>& bottom,
const vector*>& top);
virtual void Forward_gpu(const vector*>& bottom,
const vector*>& top);
virtual void Backward_cpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom);
virtual void Backward_gpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom);
// spatial mean & variance
Blob spatial_mean_, spatial_variance_;
// batch mean & variance
Blob batch_mean_, batch_variance_;
// buffer blob
Blob buffer_blob_;
Blob x_norm_;
// x_sum_multiplier is used to carry out sum using BLAS
Blob spatial_sum_multiplier_, batch_sum_multiplier_;
// dimension
int N_;
int C_;
int H_;
int W_;
// eps
Dtype var_eps_;
};
} // namespace caffe
#endif
//=========================================================================
//bn_layer.cpp
#include
#include
#include "caffe/layers/bn_layer.hpp"
namespace caffe {
template
void BNLayer::Reshape(const vector*>& bottom,
const vector*>& top) {
top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
if (top.size() > 1) {
// top blob for batch mean
top[1]->Reshape(1, C_, 1, 1);
}
if (top.size() > 2) {
// top blob for batch variance
top[2]->Reshape(1, C_, 1, 1);
}
x_norm_.Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
// mean
spatial_mean_.Reshape(N_, C_, 1, 1);
batch_mean_.Reshape(1, C_, 1, 1);
// variance
spatial_variance_.Reshape(N_, C_, 1, 1);
batch_variance_.Reshape(1, C_, 1, 1);
// buffer blob
buffer_blob_.Reshape(N_, C_, H_, W_);
// fill spatial multiplier
spatial_sum_multiplier_.Reshape(1, 1, H_, W_);
Dtype* spatial_multipl_data = spatial_sum_multiplier_.mutable_cpu_data();
caffe_set(spatial_sum_multiplier_.count(), Dtype(1),
spatial_multipl_data);
caffe_set(spatial_sum_multiplier_.count(), Dtype(0),
spatial_sum_multiplier_.mutable_cpu_diff());
// fill batch multiplier
batch_sum_multiplier_.Reshape(N_, 1, 1, 1);
Dtype* batch_multiplier_data = batch_sum_multiplier_.mutable_cpu_data();
caffe_set(batch_sum_multiplier_.count(), Dtype(1),
batch_multiplier_data);
caffe_set(batch_sum_multiplier_.count(), Dtype(0),
batch_sum_multiplier_.mutable_cpu_diff());
}
template
void BNLayer::LayerSetUp(const vector*>& bottom,
const vector*>& top) {
// Figure out the dimensions
N_ = bottom[0]->num();
C_ = bottom[0]->channels();
H_ = bottom[0]->height();
W_ = bottom[0]->width();
var_eps_ = 1e-9;
// Check if we need to set up the weights
if (this->blobs_.size() > 0) {
LOG(INFO) << "Skipping parameter initialization";
} else {
this->blobs_.resize(2);
// fill scale with scale_filler
this->blobs_[0].reset(new Blob(1, C_, 1, 1));
shared_ptr > scale_filler(GetFiller(
this->layer_param_.bn_param().scale_filler()));
scale_filler->Fill(this->blobs_[0].get());
// fill shift with shift_filler
this->blobs_[1].reset(new Blob(1, C_, 1, 1));
shared_ptr > shift_filler(GetFiller(
this->layer_param_.bn_param().shift_filler()));
shift_filler->Fill(this->blobs_[1].get());
} // parameter initialization
this->param_propagate_down_.resize(this->blobs_.size(), true);
}
template
void BNLayer::Forward_cpu(const vector*>& bottom,
const vector*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
const Dtype* const_top_data = top[0]->cpu_data();
const Dtype* scale_data = this->blobs_[0]->cpu_data();
const Dtype* shift_data = this->blobs_[1]->cpu_data();
switch (this->layer_param_.bn_param().bn_mode()) {
case BNParameter_BNMode_LEARN:
// put the squares of bottom into buffer_blob_
caffe_powx(bottom[0]->count(), bottom_data, Dtype(2),
buffer_blob_.mutable_cpu_data());
// computes variance using var(X) = E(X^2) - (EX)^2
// EX across spatial
caffe_cpu_gemv(CblasNoTrans, N_ * C_, H_ * W_,
Dtype(1. / (H_ * W_)), bottom_data,
spatial_sum_multiplier_.cpu_data(), Dtype(0),
spatial_mean_.mutable_cpu_data());
// EX across batch
caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1. / N_),
spatial_mean_.cpu_data(),
batch_sum_multiplier_.cpu_data(), Dtype(0),
batch_mean_.mutable_cpu_data());
// E(X^2) across spatial
caffe_cpu_gemv(CblasNoTrans, N_ * C_, H_ * W_,
Dtype(1. / (H_ * W_)), buffer_blob_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
spatial_variance_.mutable_cpu_data());
// E(X^2) across batch
caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1. / N_),
spatial_variance_.cpu_data(),
batch_sum_multiplier_.cpu_data(), Dtype(0),
batch_variance_.mutable_cpu_data());
caffe_powx(batch_mean_.count(), batch_mean_.cpu_data(), Dtype(2),
buffer_blob_.mutable_cpu_data()); // (EX)^2
caffe_sub(batch_mean_.count(), batch_variance_.cpu_data(),
buffer_blob_.cpu_data(),
batch_variance_.mutable_cpu_data()); // variance
// save top[1] (batch_mean) and top[2] (batch_variance)
if (top.size() > 1) {
caffe_copy(batch_mean_.count(), batch_mean_.cpu_data(),
top[1]->mutable_cpu_data());
}
if (top.size() > 2) {
caffe_copy(batch_variance_.count(), batch_variance_.cpu_data(),
top[2]->mutable_cpu_data());
}
// do mean and variance normalization
// subtract mean
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_,
C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(),
batch_mean_.cpu_data(), Dtype(0),
spatial_mean_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(-1),
spatial_mean_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_add(buffer_blob_.count(), bottom_data,
buffer_blob_.cpu_data(), top_data);
// normalize variance
caffe_add_scalar(batch_variance_.count(), var_eps_,
batch_variance_.mutable_cpu_data());
caffe_powx(batch_variance_.count(),
batch_variance_.cpu_data(), Dtype(0.5),
batch_variance_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_,
C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(),
batch_variance_.cpu_data(), Dtype(0),
spatial_variance_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans,
N_ * C_, H_ * W_, 1, Dtype(1),
spatial_variance_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_div(buffer_blob_.count(), const_top_data,
buffer_blob_.cpu_data(), top_data);
// Saving x_norm
caffe_copy(buffer_blob_.count(), const_top_data,
x_norm_.mutable_cpu_data());
// scale
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
spatial_variance_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_mul(buffer_blob_.count(), top_data,
buffer_blob_.cpu_data(), top_data);
// shift
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0),
spatial_mean_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans,
N_ * C_, H_ * W_, 1, Dtype(1),
spatial_mean_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_add(buffer_blob_.count(), const_top_data,
buffer_blob_.cpu_data(), top_data);
break;
case BNParameter_BNMode_INFERENCE:
// scale
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
spatial_variance_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_mul(buffer_blob_.count(), bottom_data,
buffer_blob_.cpu_data(), top_data);
// shift
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(), shift_data, Dtype(0),
spatial_mean_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans,
N_ * C_, H_ * W_, 1, Dtype(1),
spatial_mean_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_add(buffer_blob_.count(), const_top_data,
buffer_blob_.cpu_data(), top_data);
break;
default:
LOG(FATAL) << "Unknown BN mode.";
}
}
template
void BNLayer::Backward_cpu(const vector*>& top,
const vector& propagate_down,
const vector*>& bottom) {
const Dtype* top_diff = top[0]->cpu_diff();
const Dtype* bottom_data = bottom[0]->cpu_data();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
Dtype* scale_diff = this->blobs_[0]->mutable_cpu_diff();
Dtype* shift_diff = this->blobs_[1]->mutable_cpu_diff();
const Dtype* scale_data = this->blobs_[0]->cpu_data();
switch (this->layer_param_.bn_param().bn_mode()) {
case BNParameter_BNMode_LEARN:
// Propagate layer to parameters
// gradient w.r.t. scale
caffe_mul(buffer_blob_.count(), x_norm_.cpu_data(),
top_diff, buffer_blob_.mutable_cpu_data());
// EX across spatial
caffe_cpu_gemv(CblasNoTrans, N_ * C_,
H_ * W_, Dtype(1), buffer_blob_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
spatial_variance_.mutable_cpu_diff());
// EX across batch
caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1),
spatial_variance_.cpu_diff(),
batch_sum_multiplier_.cpu_data(), Dtype(0), scale_diff);
// gradient w.r.t. shift
// EX across spatial
caffe_cpu_gemv(CblasNoTrans, N_ * C_,
H_ * W_, Dtype(1), top_diff,
spatial_sum_multiplier_.cpu_data(),
Dtype(0), spatial_mean_.mutable_cpu_diff());
// EX across batch
caffe_cpu_gemv(CblasTrans, N_, C_,
Dtype(1), spatial_mean_.cpu_diff(),
batch_sum_multiplier_.cpu_data(),
Dtype(0), shift_diff);
// Propagate down
// put scale * top_diff to buffer_blob_
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
spatial_variance_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_mul(buffer_blob_.count(), top_diff, buffer_blob_.cpu_data(),
buffer_blob_.mutable_cpu_data());
// use new top diff for computation
caffe_mul(buffer_blob_.count(), x_norm_.cpu_data(),
buffer_blob_.cpu_data(), bottom_diff);
// EX across spatial
caffe_cpu_gemv(CblasNoTrans, N_ * C_, H_ * W_,
Dtype(1), bottom_diff,
spatial_sum_multiplier_.cpu_data(), Dtype(0),
spatial_mean_.mutable_cpu_data());
// EX across batch
caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1),
spatial_mean_.cpu_data(),
batch_sum_multiplier_.cpu_data(), Dtype(0),
batch_mean_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans,
N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(),
batch_mean_.cpu_data(), Dtype(0),
spatial_mean_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_mean_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
bottom_diff);
caffe_mul(buffer_blob_.count(),
x_norm_.cpu_data(), bottom_diff, bottom_diff);
// EX across spatial
caffe_cpu_gemv(CblasNoTrans, N_ * C_,
H_ * W_, Dtype(1), buffer_blob_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
spatial_mean_.mutable_cpu_data());
// EX across batch
caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1),
spatial_mean_.cpu_data(),
batch_sum_multiplier_.cpu_data(), Dtype(0),
batch_mean_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans,
N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(),
batch_mean_.cpu_data(), Dtype(0),
spatial_mean_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans,
N_ * C_, H_ * W_, 1, Dtype(1),
spatial_mean_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(1), bottom_diff);
caffe_cpu_axpby(buffer_blob_.count(), Dtype(1),
buffer_blob_.cpu_data(), Dtype(-1. / (N_ * H_ * W_)),
bottom_diff);
// put the squares of bottom into buffer_blob_
caffe_powx(buffer_blob_.count(), bottom_data, Dtype(2),
buffer_blob_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans,
N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(),
batch_variance_.cpu_data(), Dtype(0),
spatial_variance_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans,
N_ * C_, H_ * W_, 1, Dtype(1),
spatial_variance_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_div(buffer_blob_.count(), bottom_diff,
buffer_blob_.cpu_data(), bottom_diff);
break;
case BNParameter_BNMode_INFERENCE:
// Propagate layer to parameters
// gradient w.r.t. scale
caffe_mul(buffer_blob_.count(), bottom_data,
top_diff, buffer_blob_.mutable_cpu_data());
// EX across spatial
caffe_cpu_gemv(CblasNoTrans, N_ * C_,
H_ * W_, Dtype(1), buffer_blob_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
spatial_variance_.mutable_cpu_diff());
// EX across batch
caffe_cpu_gemv(CblasTrans, N_, C_, Dtype(1),
spatial_variance_.cpu_diff(),
batch_sum_multiplier_.cpu_data(), Dtype(0), scale_diff);
// gradient w.r.t. shift
// EX across spatial
caffe_cpu_gemv(CblasNoTrans, N_ * C_,
H_ * W_, Dtype(1), top_diff,
spatial_sum_multiplier_.cpu_data(),
Dtype(0), spatial_mean_.mutable_cpu_diff());
// EX across batch
caffe_cpu_gemv(CblasTrans, N_, C_,
Dtype(1), spatial_mean_.cpu_diff(),
batch_sum_multiplier_.cpu_data(),
Dtype(0), shift_diff);
// Propagate down
// put scale * top_diff to buffer_blob_
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.cpu_data(), scale_data, Dtype(0),
spatial_variance_.mutable_cpu_data());
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.cpu_data(),
spatial_sum_multiplier_.cpu_data(), Dtype(0),
buffer_blob_.mutable_cpu_data());
caffe_mul(buffer_blob_.count(), top_diff, buffer_blob_.cpu_data(),
bottom_diff);
break;
default:
LOG(FATAL) << "Unknown BN mode.";
}
}
#ifdef CPU_ONLY
STUB_GPU(BNLayer);
#endif
INSTANTIATE_CLASS(BNLayer);
REGISTER_LAYER_CLASS(BN);
} // namespace caffe
//=========================================================================
//bn_layer.cu
#include
#include
#include "caffe/layers/bn_layer.hpp"
namespace caffe {
template
void BNLayer::Forward_gpu(const vector*>& bottom,
const vector*>& top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
const Dtype* const_top_data = top[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
Dtype* spatial_mean_data = spatial_mean_.mutable_gpu_data();
Dtype* buffer_data = buffer_blob_.mutable_gpu_data();
const Dtype* const_buffer_data = buffer_blob_.gpu_data();
switch (this->layer_param_.bn_param().bn_mode()) {
case BNParameter_BNMode_LEARN:
// put the squares of bottom into buffer_blob_
caffe_gpu_powx(bottom[0]->count(), bottom_data, Dtype(2),
buffer_blob_.mutable_gpu_data());
// computes variance using var(X) = E(X^2) - (EX)^2
// EX across spatial
caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_,
Dtype(1. / (H_ * W_)),
bottom_data, spatial_sum_multiplier_.gpu_data(),
Dtype(0), spatial_mean_data);
// EX across batch
caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1. / N_),
spatial_mean_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
batch_mean_.mutable_gpu_data());
// E(X^2) across spatial
caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_,
Dtype(1. / (H_ * W_)), buffer_data,
spatial_sum_multiplier_.gpu_data(), Dtype(0),
spatial_variance_.mutable_gpu_data());
// E(X^2) across batch
caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1. / N_),
spatial_variance_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
batch_variance_.mutable_gpu_data());
caffe_gpu_powx(batch_mean_.count(), batch_mean_.gpu_data(),
Dtype(2), buffer_blob_.mutable_gpu_data()); // (EX)^2
caffe_gpu_sub(batch_mean_.count(), batch_variance_.gpu_data(),
buffer_data, batch_variance_.mutable_gpu_data()); // variance
// save top[1] (batch_mean) and top[2] (batch_variance)
if (top.size() > 1) {
caffe_copy(batch_mean_.count(), batch_mean_.gpu_data(),
top[1]->mutable_gpu_data());
}
if (top.size() > 2) {
caffe_copy(batch_variance_.count(), batch_variance_.gpu_data(),
top[2]->mutable_gpu_data());
}
// do mean and variance normalization
// subtract mean
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), batch_mean_.gpu_data(), Dtype(0),
spatial_mean_data);
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_,
1, -Dtype(1),
spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(), Dtype(0),
buffer_blob_.mutable_gpu_data());
caffe_gpu_add(buffer_blob_.count(), bottom_data, buffer_data, top_data);
// normalize variance
caffe_gpu_add_scalar(batch_variance_.count(), var_eps_,
batch_variance_.mutable_gpu_data());
caffe_gpu_powx(batch_variance_.count(), batch_variance_.gpu_data(),
Dtype(0.5), batch_variance_.mutable_gpu_data());
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), batch_variance_.gpu_data(), Dtype(0),
spatial_variance_.mutable_gpu_data());
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(0), buffer_blob_.mutable_gpu_data());
caffe_gpu_div(buffer_blob_.count(), top_data, buffer_data, top_data);
// Saving x_norm
caffe_copy(top[0]->count(), const_top_data, x_norm_.mutable_gpu_data());
// scale
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), this->blobs_[0]->gpu_data(),
Dtype(0), spatial_variance_.mutable_gpu_data());
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(0), buffer_blob_.mutable_gpu_data());
caffe_gpu_mul(buffer_blob_.count(), top_data, buffer_data, top_data);
// shift
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(),
this->blobs_[1]->gpu_data(), Dtype(0),
spatial_mean_data);
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1,
Dtype(1),
spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(), Dtype(0),
buffer_blob_.mutable_gpu_data());
caffe_gpu_add(buffer_blob_.count(), top_data, buffer_data, top_data);
break;
case BNParameter_BNMode_INFERENCE:
// scale
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), this->blobs_[0]->gpu_data(),
Dtype(0), spatial_variance_.mutable_gpu_data());
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(0), buffer_blob_.mutable_gpu_data());
caffe_gpu_mul(buffer_blob_.count(), bottom_data, buffer_data, top_data);
// shift
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(),
this->blobs_[1]->gpu_data(), Dtype(0),
spatial_mean_data);
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_, H_ * W_, 1,
Dtype(1),
spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(), Dtype(0),
buffer_blob_.mutable_gpu_data());
caffe_gpu_add(buffer_blob_.count(), top_data, buffer_data, top_data);
break;
default:
LOG(FATAL) << "Unknown BN mode.";
}
}
template
void BNLayer::Backward_gpu(const vector*>& top,
const vector& propagate_down,
const vector*>& bottom) {
const Dtype* top_diff = top[0]->gpu_diff();
const Dtype* top_data = top[0]->gpu_data();
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
const Dtype* const_bottom_diff = bottom[0]->gpu_diff();
Dtype* spatial_mean_data = spatial_mean_.mutable_gpu_data();
Dtype* buffer_data = buffer_blob_.mutable_gpu_data();
const Dtype* const_buffer_data = buffer_blob_.gpu_data();
switch (this->layer_param_.bn_param().bn_mode()) {
case BNParameter_BNMode_LEARN:
// Propage to layer params
// gradient w.r.t. scale
caffe_gpu_mul(buffer_blob_.count(), x_norm_.gpu_data(),
top_diff, buffer_blob_.mutable_gpu_data());
// EX across spatial
caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1),
buffer_data, spatial_sum_multiplier_.gpu_data(), Dtype(0),
spatial_variance_.mutable_gpu_data());
// EX across batch
caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1),
spatial_variance_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
this->blobs_[0]->mutable_gpu_diff());
// gradient w.r.t. shift
// EX across spatial
caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1),
top_diff, spatial_sum_multiplier_.gpu_data(),
Dtype(0), spatial_mean_data);
// EX across batch
caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1),
spatial_mean_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
this->blobs_[1]->mutable_gpu_diff());
// Propagate down
// scale top diff
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), this->blobs_[0]->gpu_data(),
Dtype(0), spatial_variance_.mutable_gpu_data());
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(0),
buffer_blob_.mutable_gpu_data());
caffe_gpu_mul(buffer_blob_.count(), top_diff, buffer_data,
buffer_blob_.mutable_gpu_data());
// use new top diff for computation
caffe_gpu_mul(buffer_blob_.count(), x_norm_.gpu_data(),
buffer_data, bottom_diff);
// EX across spatial
caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_,
Dtype(1), bottom_diff,
spatial_sum_multiplier_.gpu_data(), Dtype(0), spatial_mean_data);
// EX across batch
caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1),
spatial_mean_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
batch_mean_.mutable_gpu_data());
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(),
batch_mean_.gpu_data(), Dtype(0),
spatial_mean_data);
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1), spatial_mean_.gpu_data(),
spatial_sum_multiplier_.gpu_data(), Dtype(0),
bottom_diff);
caffe_gpu_mul(buffer_blob_.count(), x_norm_.gpu_data(),
bottom_diff, bottom_diff);
// EX across spatial
caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1),
buffer_data, spatial_sum_multiplier_.gpu_data(),
Dtype(0), spatial_mean_data);
// EX across batch
caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1),
spatial_mean_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
batch_mean_.mutable_gpu_data());
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_,
C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(),
batch_mean_.gpu_data(), Dtype(0),
spatial_mean_data);
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_mean_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(1),
bottom_diff);
caffe_gpu_axpby(buffer_blob_.count(), Dtype(1), buffer_data,
Dtype(-1. / (N_ * H_ * W_)),
bottom_diff);
// put the squares of bottom into buffer_blob_
caffe_gpu_powx(buffer_blob_.count(), bottom_data, Dtype(2),
buffer_blob_.mutable_gpu_data());
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), batch_variance_.gpu_data(), Dtype(0),
spatial_variance_.mutable_gpu_data());
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(0),
buffer_blob_.mutable_gpu_data());
caffe_gpu_div(buffer_blob_.count(), const_bottom_diff,
const_buffer_data, bottom_diff);
break;
case BNParameter_BNMode_INFERENCE:
// Propage to layer params
// gradient w.r.t. scale
caffe_gpu_mul(buffer_blob_.count(), bottom_data,
top_diff, buffer_blob_.mutable_gpu_data());
// EX across spatial
caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1),
buffer_data, spatial_sum_multiplier_.gpu_data(), Dtype(0),
spatial_variance_.mutable_gpu_data());
// EX across batch
caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1),
spatial_variance_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
this->blobs_[0]->mutable_gpu_diff());
// gradient w.r.t. shift
// EX across spatial
caffe_gpu_gemv(CblasNoTrans, N_ * C_, H_ * W_, Dtype(1),
top_diff, spatial_sum_multiplier_.gpu_data(),
Dtype(0), spatial_mean_data);
// EX across batch
caffe_gpu_gemv(CblasTrans, N_, C_, Dtype(1),
spatial_mean_.gpu_data(),
batch_sum_multiplier_.gpu_data(), Dtype(0),
this->blobs_[1]->mutable_gpu_diff());
// Propagate down
// scale top diff
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_, C_, 1, Dtype(1),
batch_sum_multiplier_.gpu_data(), this->blobs_[0]->gpu_data(),
Dtype(0), spatial_variance_.mutable_gpu_data());
caffe_gpu_gemm(CblasNoTrans, CblasNoTrans, N_ * C_,
H_ * W_, 1, Dtype(1),
spatial_variance_.gpu_data(), spatial_sum_multiplier_.gpu_data(),
Dtype(0),
buffer_blob_.mutable_gpu_data());
caffe_gpu_mul(buffer_blob_.count(), top_diff, buffer_data,
bottom_diff);
break;
default:
LOG(FATAL) << "Unknown BN mode.";
}
}
INSTANTIATE_LAYER_GPU_FUNCS(BNLayer);
} // namespace caffe
==========================================================================
//upsamplelayer.hpp
#ifndef CAFFE_UPSAMPLE_LAYER_HPP_
#define CAFFE_UPSAMPLE_LAYER_HPP_
#include
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/common.hpp"
#include "caffe/syncedmem.hpp"
#include "caffe/util/math_functions.hpp"
//#include "caffe/layers/upsample_layer.hpp"
namespace caffe {
template
class UpsampleLayer : public Layer {
public:
explicit UpsampleLayer(const LayerParameter& param)
: Layer(param) {}
virtual void LayerSetUp(const vector*>& bottom,
const vector*>& top);
virtual void Reshape(const vector*>& bottom,
const vector*>& top);
virtual inline const char* type() const { return "Upsample"; }
virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector*>& bottom,
const vector*>& top);
virtual void Forward_gpu(const vector*>& bottom,
const vector*>& top);
virtual void Backward_cpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom);
virtual void Backward_gpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom);
/*
virtual void UpsampleForward(const int nthreads, int in_w, int in_h,
int out_w, int out_h, const Dtype* bottom_data,
const Dtype* bottom_mask, Dtype* top_data);
virtual void UpsampleBackward(const int nthreads, int in_w, int in_h,
int out_w, int out_h, const Dtype* top_diff,
const Dtype* bottom_mask, Dtype* bottom_diff);
*/
int channels_;
int height_;
int width_;
int scale_h_, scale_w_;
bool pad_out_h_, pad_out_w_;
int upsample_h_, upsample_w_;
};
} // namespace caffe
#endif
==========================================================================
//upsample_layer.cpp
#include
#include
#include
#include
#include "caffe/layers/upsample_layer.hpp"
namespace caffe {
template
void UpsampleLayer::LayerSetUp(const vector*>& bottom,
const vector*>& top) {
UpsampleParameter upsample_param = this->layer_param_.upsample_param();
CHECK((upsample_param.has_upsample_h() && upsample_param.has_upsample_w())
|| (!upsample_param.has_scale() && upsample_param.has_scale_h()
&& upsample_param.has_scale_w())
|| (!upsample_param.has_scale_h() && !upsample_param.has_scale_w()))
<< "upsample_h & upsample_w are required, else (DEPRECATED) "
<< "scale OR scale_h & scale_w are required.";
if (upsample_param.has_upsample_h() && upsample_param.has_upsample_w()) {
upsample_h_ = upsample_param.upsample_h();
upsample_w_ = upsample_param.upsample_w();
CHECK_GT(upsample_h_, 1);
CHECK_GT(upsample_w_, 1);
} else {
LOG(INFO) << "Params 'pad_out_{}_' are deprecated. Please declare upsample"
<< " height and width useing the upsample_h, upsample_w parameters.";
if (!upsample_param.has_scale_h()) {
scale_h_ = scale_w_ = upsample_param.scale();
CHECK_GT(scale_h_, 1);
} else {
scale_h_ = upsample_param.scale_h();
scale_w_ = upsample_param.scale_w();
CHECK_GT(scale_h_, 1);
CHECK_GT(scale_w_, 1);
}
pad_out_h_ = upsample_param.pad_out_h();
pad_out_w_ = upsample_param.pad_out_w();
CHECK(!pad_out_h_ || scale_h_ == 2)
<< "Output height padding compensation requires scale_h == 2, otherwise "
<< "the output size is ill-defined.";
CHECK(!pad_out_w_ || scale_w_ == 2)
<< "Output width padding compensation requires scale_w == 2, otherwise "
<< "the output size is ill-defined.";
upsample_h_ = upsample_w_ = -1; // flag to calculate in Reshape
}
}
template
void UpsampleLayer::Reshape(const vector*>& bottom,
const vector*>& top) {
CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
<< "corresponding to (num, channels, height, width)";
CHECK_EQ(4, bottom[1]->num_axes()) << "Input mask must have 4 axes, "
<< "corresponding to (num, channels, height, width)";
CHECK_EQ(bottom[0]->num(), bottom[1]->num());
CHECK_EQ(bottom[0]->channels(), bottom[1]->channels());
CHECK_EQ(bottom[0]->height(), bottom[1]->height());
CHECK_EQ(bottom[0]->width(), bottom[1]->width());
if (upsample_h_ <= 0 || upsample_w_ <= 0) {
upsample_h_ = bottom[0]->height() * scale_h_ - int(pad_out_h_);
upsample_w_ = bottom[0]->width() * scale_w_ - int(pad_out_w_);
}
top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(), upsample_h_,
upsample_w_);
channels_ = bottom[0]->channels();
height_ = bottom[0]->height();
width_ = bottom[0]->width();
}
template
void UpsampleLayer::Forward_cpu(const vector*>& bottom,
const vector*>& top) {
const Dtype* bottom_data = bottom[0]->cpu_data();
const Dtype* bottom_mask_data = bottom[1]->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
// Initialize
const int top_count = top[0]->count();
caffe_set(top_count, Dtype(0), top_data);
// The main loop
for (int n = 0; n < bottom[0]->num(); ++n) {
for (int c = 0; c < channels_; ++c) {
for (int i = 0; i < height_ * width_; ++i) {
const int idx = static_cast(bottom_mask_data[i]);
if (idx >= upsample_h_ * upsample_w_) {
// this can happen if the pooling layer that created the input mask
// had an input with different size to top[0]
LOG(FATAL) << "upsample top index " << idx << " out of range - "
<< "check scale settings match input pooling layer's "
<< "downsample setup";
}
top_data[idx] = bottom_data[i];
}
// compute offset
bottom_data += bottom[0]->offset(0, 1);
bottom_mask_data += bottom[1]->offset(0, 1);
top_data += top[0]->offset(0, 1);
}
}
}
template
void UpsampleLayer::Backward_cpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom) {
if (propagate_down[0]) {
const Dtype* top_diff = top[0]->cpu_diff();
const Dtype* bottom_mask_data = bottom[1]->cpu_data();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
const int bottom_count = bottom[0]->count();
caffe_set(bottom_count, Dtype(0), bottom_diff);
// The main loop
for (int n = 0; n < bottom[0]->num(); ++n) {
for (int c = 0; c < channels_; ++c) {
for (int i = 0; i < height_ * width_; ++i) {
const int idx = static_cast(bottom_mask_data[i]);
if (idx >= height_ * width_ * scale_h_ * scale_w_) {
// this can happen if the pooling layer that created
// the input mask had an input with different size to top[0]
LOG(FATAL) << "upsample top index " << idx << " out of range - "
<< "check scale settings match input pooling layer's downsample setup";
}
bottom_diff[i] = top_diff[idx];
}
// compute offset
bottom_diff += bottom[0]->offset(0, 1);
bottom_mask_data += bottom[1]->offset(0, 1);
top_diff += top[0]->offset(0, 1);
}
}
}
}
#ifdef CPU_ONLY
STUB_GPU(UpsampleLayer);
#endif
INSTANTIATE_CLASS(UpsampleLayer);
REGISTER_LAYER_CLASS(Upsample);
} // namespace caffe
==========================================================================
//upsample_layer.cu
#include
#include
#include
#include "caffe/layers/upsample_layer.hpp"
namespace caffe {
template
__global__ void UpsampleForward(const int nthreads, int in_w, int in_h,
int out_w, int out_h, const Dtype* bottom_data,
const Dtype* bottom_mask, Dtype* top_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int offset = index / (in_w * in_h) * out_w * out_h;
int upsample_idx = static_cast(bottom_mask[index]);
top_data[offset + upsample_idx] = bottom_data[index];
}
}
template
void UpsampleLayer::Forward_gpu(const vector*>& bottom,
const vector*>& top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
const Dtype* bottom_mask = bottom[1]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
caffe_gpu_set(top[0]->count(), Dtype(0), top_data);
int bottom_count = bottom[0]->count();
UpsampleForward<<>>(
bottom_count, bottom[0]->width(), bottom[0]->height(),
top[0]->width(), top[0]->height(), bottom_data, bottom_mask, top_data);
CUDA_POST_KERNEL_CHECK;
}
template
__global__ void UpsampleBackward(const int nthreads, int in_w, int in_h,
int out_w, int out_h, const Dtype* top_diff,
const Dtype* bottom_mask, Dtype* bottom_diff) {
CUDA_KERNEL_LOOP(index, nthreads) {
int offset = index / (in_w * in_h) * out_w * out_h;
int upsample_idx = static_cast(bottom_mask[index]);
bottom_diff[index] = top_diff[offset + upsample_idx];
}
}
template
void UpsampleLayer::Backward_gpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom) {
if (propagate_down[0]) {
const Dtype* top_diff = top[0]->gpu_diff();
const Dtype* bottom_mask = bottom[1]->gpu_data();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
const int bottom_count = bottom[0]->count();
caffe_gpu_set(bottom_count, Dtype(0.), bottom_diff);
UpsampleBackward<<>>(
bottom_count, bottom[0]->width(), bottom[0]->height(),
top[0]->width(), top[0]->height(), top_diff, bottom_mask, bottom_diff);
CUDA_POST_KERNEL_CHECK;
}
}
INSTANTIATE_LAYER_GPU_FUNCS(UpsampleLayer);
} // namespace caffe
原来的数据输入层,为此作者还新加了DenseImageDataLayer.其实没必要用这个层。
layer {
name: "data"
type: "DenseImageData"
top: "data"
top: "label"
dense_image_data_param {
source: "/SegNet/CamVid/train.txt" # Change this to the absolute path to your data file
batch_size: 4 # Change this number to a batch size that will fit on your GPU
shuffle: true
}
}
数据输入层写为下面的样式。将data label 分别输入。
layer {
name: "data"
type: "Data"
top:"data"
include {
phase: TRAIN
}
transform_param {
mean_file: "G:/interest_of_imags_for_recognation/VOC2012/Resize224/Img_train_mean.binaryproto"
}
data_param {
source: "G:/interest_of_imags_for_recognation/VOC2012/Resize224/Img_train"
batch_size: 1
backend: LMDB
}
}
layer {
name: "label"
type: "Data"
top:"label"
include {
phase: TRAIN
}
data_param {
source: "G:/interest_of_imags_for_recognation/VOC2012/Resize224/Label_train"
batch_size: 1
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
include {
phase: TEST
}
transform_param {
mean_file: "G:/interest_of_imags_for_recognation/VOC2012/Resize224/Img_val_mean.binaryproto"
}
data_param {
source: "G:/interest_of_imags_for_recognation/VOC2012/Resize224/Img_val"
batch_size: 1
backend: LMDB
}
}
layer {
name: "label"
type: "Data"
top: "label"
include {
phase: TEST
}
data_param {
source: "G:/interest_of_imags_for_recognation/VOC2012/Resize224/Label_val"
batch_size: 1
backend: LMDB
}
}