namespace caffe {
template
void EuclideanLossNormLayer::Forward_gpu(const vector*>& bottom,
const vector*>& top) {
int count = bottom[0]->count();//count: count其实等于num*channels*height*width,也就是整个Blob元素的数量,在sacnn中num=1,channels=1, height和width分别为输出特征图的宽和高。
caffe_gpu_sub(
count,
bottom[0]->gpu_data(),//网络的输出值
bottom[1]->gpu_data(),//标签值
diff_.mutable_gpu_data());//存储bottom[0]-bottom[1]
Dtype dot;
// LOG(INFO) << "estedmated count" << bottom[0]->cpu_data()[0];
// LOG(INFO) << "groundtruth count" << bottom[1]->cpu_data()[0];
caffe_gpu_set(count, Dtype(1), diffdiv_.mutable_gpu_data());//注释1
caffe_gpu_axpy(count, Dtype(1), bottom[1]->gpu_data(), diffdiv_.mutable_gpu_data());//注释2 Y[i]+1 标签值+1,即(bottom[1]->gpu_data())+1
caffe_gpu_div(count, diff_.gpu_data(), diffdiv_.gpu_data(), diff_.mutable_gpu_data());
//注释3 diff_.gpu_data=diff_.gpu_data()/diffdiv_.gpu_data(),其中diff_.gpu_data存储bottom[0]-bottom[1],diffdiv_.gpu_data()存储标签值+1
caffe_gpu_dot(count, diff_.gpu_data(), diff_.gpu_data(), &dot);//计算两个输入向量的点乘
//dot = diff_.asum_data();
Dtype loss = dot / bottom[0]->num() / Dtype(2);
top[0]->mutable_cpu_data()[0] = loss;
}
template
void EuclideanLossNormLayer::Backward_gpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom) {
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
caffe_gpu_axpby(
bottom[i]->count(), // count
alpha, // alpha
diff_.gpu_data(), // a
Dtype(0), // beta
bottom[i]->mutable_gpu_diff()); // b
caffe_gpu_div(bottom[i]->count(), bottom[i]->gpu_data(), diffdiv_.gpu_data(), bottom[i]->mutable_gpu_data());
}
}
}
INSTANTIATE_LAYER_GPU_FUNCS(EuclideanLossNormLayer);
} // namespace caffe
/**
注释:
1.caffe_gpu_set函数:
template
void caffe_gpu_set(const int N, const Dtype alpha, Dtype* Y) {
if (alpha == 0) {
CUDA_CHECK(cudaMemset(Y, 0, sizeof(Dtype) * N)); // NOLINT(caffe/alt_fn)
return;
}
// NOLINT_NEXT_LINE(whitespace/operators)
set_kernel<<>>(
N, alpha, Y);
}
template
__global__ void set_kernel(const int n, const Dtype alpha, Dtype* y) {
CUDA_KERNEL_LOOP(index, n) {
y[index] = alpha;
}
}
功能:
用常数alpha对Y进行初始化:
Y[i]=1;也就是diffdiv_.mutable_gpu_data()的值
2.
template <>
void caffe_axpy(const int N, const float alpha, const float* X,
float* Y) { cblas_saxpy(N, alpha, X, 1, Y, 1); }
功能:
Y=alpha*X+Y 此处alpha=1,Y[i]=1
3.
void caffe_gpu_div(const int N, const double* a,
const double* b, double* y) {
// NOLINT_NEXT_LINE(whitespace/operators)
div_kernel<<>>(
N, a, b, y);
功能:
y[i]=a[i]\b[i]
4.
template <>
void caffe_cpu_axpby(const int N, const float alpha, const float* X,
const float beta, float* Y) {
cblas_saxpby(N, alpha, X, 1, beta, Y, 1);
}
功能:Y= alpha*X+beta*Y
**/
持续撸码↖(^ω^)↗