这篇文章主要通过《Learning Transferable Features with Deep Adaptation Networks》这篇论文,增加MMDLoss网络层,对caffe增加网络层进行讲解。
1.添加头文件
在/caffe-master/include/caffe/layers/neuron_layer.hpp 中添加MMDLoss的头文件声明
#ifndef CAFFE_NEURON_LAYER_HPP_
#define CAFFE_NEURON_LAYER_HPP_
#include
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
/**
* @brief An interface for layers that take one blob as input (@f$ x @f$)
* and produce one equally-sized blob as output (@f$ y @f$), where
* each element of the output depends only on the corresponding input
* element.
*/
template
class NeuronLayer : public Layer {
public:
explicit NeuronLayer(const LayerParameter& param)
: Layer(param) {}
virtual void Reshape(const vector*>& bottom,
const vector*>& top);
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
};
template
class MMDLossLayer : public NeuronLayer {
public:
explicit MMDLossLayer(const LayerParameter& param)
: NeuronLayer(param){}
virtual void LayerSetUp(const vector*>& bottom,
const vector*>& top);
virtual void Reshape(const vector*>& bottom,
const vector*>& top);
virtual inline const char* type() const { return "MMDLoss"; }
virtual inline int ExactNumBottomBlobs() const { return -1; }
protected:
virtual void Forward_cpu(const vector*>& bottom,
const vector*>& top);
virtual void Forward_gpu(const vector*>& bottom,
const vector*>& top);
virtual void Backward_cpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom);
virtual void Backward_gpu(const vector*>& top,
const vector& propagate_down, const vector*>& bottom);
Dtype* beta_;
Blob mmd_data_;
Dtype mmd_lambda_;
int input_num_;
int data_dim_;
int size_of_source_;
int size_of_target_;
Dtype gamma_;
int num_of_kernel_;
int* source_index_;
int* target_index_;
int iter_of_epoch_;
int now_iter_;
bool fix_gamma_;
Dtype** Q_;
Dtype* sum_of_epoch_;
Dtype* variance_;
Dtype I_lambda_;
int all_sample_num_;
int top_k_;
Dtype* sum_of_pure_mmd_;
int method_number_;
Dtype kernel_mul_;
};
} // namespace caffe
#endif // CAFFE_NEURON_LAYER_HPP_
在/caffe-master/src/caffe/layers里添加mmd_layer.cpp文件
#include
#include
#include
#include "caffe/layer.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/layers/neuron_layer.hpp"
namespace caffe {
template
void MMDLossLayer::LayerSetUp(
const vector*>& bottom, const vector*>& top) {
NeuronLayer::LayerSetUp(bottom, top);
input_num_ = bottom[0]->count(0, 1);
data_dim_ = bottom[0]->count(1);
num_of_kernel_ = this->layer_param_.mmd_param().num_of_kernel();
mmd_lambda_ = this->layer_param_.mmd_param().mmd_lambda();
iter_of_epoch_ = this->layer_param_.mmd_param().iter_of_epoch();
fix_gamma_ = this->layer_param_.mmd_param().fix_gamma();
beta_ = new Dtype[num_of_kernel_];
caffe_set(num_of_kernel_, Dtype(1.0) / num_of_kernel_, beta_);
now_iter_ = 0;
sum_of_epoch_ = new Dtype[num_of_kernel_];
caffe_set(num_of_kernel_, Dtype(0), sum_of_epoch_);
gamma_ = Dtype(-1);
Q_ = new Dtype* [num_of_kernel_];
for(int i = 0; i < num_of_kernel_; i++){
Q_[i] = new Dtype[num_of_kernel_];
caffe_set(num_of_kernel_, Dtype(0), Q_[i]);
}
variance_ = new Dtype[num_of_kernel_];
caffe_set(num_of_kernel_, Dtype(0), variance_);
sum_of_pure_mmd_ = new Dtype[num_of_kernel_];
caffe_set(num_of_kernel_, Dtype(0), sum_of_pure_mmd_);
all_sample_num_ = 0;
kernel_mul_ = this->layer_param_.mmd_param().kernel_mul();
if(this->layer_param_.mmd_param().method() == "max"){
method_number_ = 1;
top_k_ = this->layer_param_.mmd_param().method_param().top_num();
}
else if(this->layer_param_.mmd_param().method() == "none"){
method_number_ = 0;
}
else if(this->layer_param_.mmd_param().method() == "L2"){
method_number_ = 4;
top_k_ = this->layer_param_.mmd_param().method_param().top_num();
I_lambda_ = this->layer_param_.mmd_param().method_param().i_lambda();
}
else if(this->layer_param_.mmd_param().method() == "max_ratio"){
top_k_ = this->layer_param_.mmd_param().method_param().top_num();
method_number_ = 3;
}
LOG(INFO) << this->layer_param_.mmd_param().method() << " num: " << method_number_;
source_index_ = new int[input_num_];
target_index_ = new int[input_num_];
mmd_data_.Reshape(1, 1, 1, data_dim_);
}
template
void MMDLossLayer::Reshape(
const vector*>& bottom, const vector*>& top) {
NeuronLayer::Reshape(bottom, top);
}
template
void MMDLossLayer::Forward_cpu(
const vector*>& bottom, const vector*>& top) {
}
template
void MMDLossLayer::Backward_cpu(
const vector*>& top, const vector& propagate_down,
const vector*>& bottom) {
}
#ifdef CPU_ONLY
STUB_GPU(MMDLossLayer);
#endif
INSTANTIATE_CLASS(MMDLossLayer);
REGISTER_LAYER_CLASS(MMDLoss);
}
在/caffe-master/src/caffe/layers里添加mmd_layer.cu文件
#include
#include
#include
#include
#include
#include
#include
typedef CGAL::MP_Float ET;
#include "caffe/layer.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/layers/neuron_layer.hpp"
#include "caffe/util/output_matrix.hpp"
typedef CGAL::Quadratic_program_from_iterators
,
bool*, float*,bool*,float*,float**,float*> Program;
typedef CGAL::Quadratic_program_solution Solution;
namespace caffe {
template
void MMDLossLayer::Forward_gpu(
const vector*>& top,
const vector*>& bottom){
}
template
void perm_source_and_target(int num, int* source_index, int* target_index,
int& size_of_source, int& size_of_target, const Dtype* label){
int source_pos = 0;
int target_pos = 0;
for(int i = 0;i < num;++i){
if(label[i * 2] < 0){
//source data
source_index[source_pos++] = i;
}
else{
//target data
target_index[target_pos++] = i;
}
}
size_of_source = source_pos;
size_of_target = target_pos;
}
template
std::vector > maxn(int num_of_max, Dtype* mmd, int num_of_kernel){
std::vector > temp;
for(int i = 0; i < num_of_kernel; i++){
temp.push_back(std::make_pair(mmd[i], i));
}
std::partial_sort(
temp.begin(), temp.begin() + num_of_max, temp.end(), std::greater >());
return temp;
}
template
void MMDLossLayer::Backward_gpu(
const vector*>& top, const vector& propagate_down,
const vector*>& bottom) {
if(mmd_lambda_ == 0){
return;
}
now_iter_++;
Dtype sum;
caffe_gpu_asum(input_num_ * data_dim_, bottom[0]->gpu_diff(), &sum);
LOG(INFO) << "before mmd diff " << sum;
perm_source_and_target(input_num_, source_index_, target_index_,
size_of_source_, size_of_target_, bottom[1]->cpu_data());
int sample_num;
if (size_of_source_ <= 1 || size_of_target_ <= 1){
return;
}
if(size_of_source_ > size_of_target_){
sample_num = size_of_source_;
}
else{
sample_num = size_of_target_;
}
int s1,s2,t1,t2;
srand((unsigned int)time(0));
Dtype* bottom_data = bottom[0]->mutable_gpu_data();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
Dtype* tempX1 = mmd_data_.mutable_gpu_data();
Dtype* tempX2 = mmd_data_.mutable_gpu_diff();
Dtype square_distance;
Dtype bandwidth = 0;
for(int i = 0; i < input_num_; i++){
s1 = rand() % input_num_;
s2 = rand() % input_num_;
s2 = (s1 != s2) ? s2 : (s2 + 1) % input_num_;
caffe_gpu_memcpy(sizeof(Dtype) * data_dim_, bottom_data + s1 * data_dim_, tempX1);
caffe_gpu_memcpy(sizeof(Dtype) * data_dim_, bottom_data + s2 * data_dim_, tempX2);
caffe_gpu_sub(data_dim_, tempX1, tempX2, tempX2);
caffe_gpu_dot(data_dim_, tempX2, tempX2, &square_distance);
bandwidth += square_distance;
}
if(fix_gamma_){
gamma_ = gamma_ < 0 ? (Dtype)input_num_ / bandwidth : gamma_;
}
else{
gamma_ = (Dtype)input_num_ / bandwidth;
}
LOG(INFO) << "bandwidth " << gamma_;
Dtype loss = 0;
Dtype* temp_loss1 = new Dtype[num_of_kernel_];
Dtype* temp_loss2 = new Dtype[num_of_kernel_];
Dtype* temp_loss3 = new Dtype[num_of_kernel_];
Dtype* temp_loss4 = new Dtype[num_of_kernel_];
all_sample_num_ += sample_num;
for(int i = 0; i < sample_num; i++){
//random get sample, insert code
s1 = rand() % size_of_source_;
s2 = rand() % size_of_source_;
s2 = (s1 != s2) ? s2 : (s2 + 1) % size_of_source_;
t1 = rand() % size_of_target_;
t2 = rand() % size_of_target_;
t2 = (t1 != t2) ? t2 : (t2 + 1) % size_of_target_;
s1 = source_index_[s1];
s2 = source_index_[s2];
t1 = target_index_[t1];
t2 = target_index_[t2];
//
Dtype square_sum = 0;
Dtype factor_for_diff = 0;
const Dtype* x_s1 = bottom_data + s1 * data_dim_;
const Dtype* x_s2 = bottom_data + s2 * data_dim_;
const Dtype* x_t1 = bottom_data + t1 * data_dim_;
const Dtype* x_t2 = bottom_data + t2 * data_dim_;
caffe_gpu_sub(data_dim_, x_s1, x_s2, tempX1);
caffe_gpu_sub(data_dim_, x_s2, x_s1, tempX2);
caffe_gpu_dot(data_dim_, tempX1, tempX1, &square_sum);
Dtype times = pow(kernel_mul_, (Dtype)(num_of_kernel_ / 2));
Dtype temp_gamma = gamma_ / times;
for(int j = 0; j < num_of_kernel_; j++){
Dtype temp_n = (0.0 - temp_gamma) * square_sum;
temp_n = exp(temp_n);
sum_of_pure_mmd_[j] += temp_n;
temp_n = temp_n * beta_[j];
if(i % 2 == 0){
temp_loss1[j] = temp_n;
}
else{
temp_loss2[j] = temp_n;
}
if(i % 2 == 0){
temp_loss3[j] = temp_n;
}
else{
temp_loss4[j] = temp_n;
}
loss += temp_n;
temp_n = (-2) * temp_gamma * temp_n;
sum_of_epoch_[j] += temp_n;
factor_for_diff += temp_n;
temp_gamma = temp_gamma * kernel_mul_;
}
caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX1);
caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX2);
caffe_gpu_add(data_dim_, tempX1, bottom_diff + s1 * data_dim_, bottom_diff + s1 * data_dim_);
caffe_gpu_add(data_dim_, tempX2, bottom_diff + s2 * data_dim_, bottom_diff + s2 * data_dim_);
factor_for_diff = 0;
caffe_gpu_sub(data_dim_, x_s1, x_t2, tempX1);
caffe_gpu_sub(data_dim_, x_t2, x_s1, tempX2);
caffe_gpu_dot(data_dim_, tempX1, tempX1, &square_sum);
temp_gamma = gamma_ / times;
for(int j = 0; j < num_of_kernel_; j++){
Dtype temp_n = (0.0 - temp_gamma) * square_sum;
temp_n = exp(temp_n) * Dtype(-1);
sum_of_pure_mmd_[j] += temp_n;
if(i % 2 == 0){
temp_loss1[j] += temp_n;
}
else{
temp_loss2[j] += temp_n;
}
temp_n = temp_n * beta_[j];
if(i % 2 == 0){
temp_loss3[j] = temp_n;
}
else{
temp_loss4[j] = temp_n;
}
loss += temp_n;
temp_n = (-2) * temp_gamma * temp_n;
sum_of_epoch_[j] += temp_n;
factor_for_diff += temp_n;
temp_gamma = temp_gamma * kernel_mul_;
}
caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX1);
caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX2);
caffe_gpu_add(data_dim_, tempX1, bottom_diff + s1 * data_dim_, bottom_diff + s1 * data_dim_);
caffe_gpu_add(data_dim_, tempX2, bottom_diff + t2 * data_dim_, bottom_diff + t2 * data_dim_);
factor_for_diff = 0;
caffe_gpu_sub(data_dim_, x_t1, x_s2, tempX1);
caffe_gpu_sub(data_dim_, x_s2, x_t1, tempX2);
caffe_gpu_dot(data_dim_, tempX1, tempX1, &square_sum);
temp_gamma = gamma_ / times;
for(int j = 0; j < num_of_kernel_; j++){
Dtype temp_n = (0.0 - temp_gamma) * square_sum;
temp_n = exp(temp_n) * Dtype(-1);
sum_of_pure_mmd_[j] += temp_n;
if(i % 2 == 0){
temp_loss1[j] += temp_n;
}
else{
temp_loss2[j] += temp_n;
}
temp_n = temp_n * beta_[j];
if(i % 2 == 0){
temp_loss3[j] = temp_n;
}
else{
temp_loss4[j] = temp_n;
}
loss += temp_n;
temp_n = (-2) * temp_gamma * temp_n;
sum_of_epoch_[j] += temp_n;
factor_for_diff += temp_n;
temp_gamma = temp_gamma * kernel_mul_;
}
caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX1);
caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX2);
caffe_gpu_add(data_dim_, tempX1, bottom_diff + t1 * data_dim_, bottom_diff + t1 * data_dim_);
caffe_gpu_add(data_dim_, tempX2, bottom_diff + s2 * data_dim_, bottom_diff + s2 * data_dim_);
factor_for_diff = 0;
caffe_gpu_sub(data_dim_, x_t1, x_t2, tempX1);
caffe_gpu_sub(data_dim_, x_t2, x_t1, tempX2);
caffe_gpu_dot(data_dim_, tempX1, tempX1, &square_sum);
temp_gamma = gamma_ / times;
for(int j = 0; j < num_of_kernel_; j++){
Dtype temp_n = (0.0 - temp_gamma) * square_sum;
temp_n = exp(temp_n);
sum_of_pure_mmd_[j] += temp_n;
if(i % 2 == 0){
temp_loss1[j] += temp_n;
}
else{
temp_loss2[j] += temp_n;
}
temp_n = temp_n * beta_[j];
if(i % 2 == 0){
temp_loss3[j] = temp_n;
}
else{
temp_loss4[j] = temp_n;
}
loss += temp_n;
temp_n = (-2) * temp_gamma * temp_n;
sum_of_epoch_[j] += temp_n;
factor_for_diff += temp_n;
temp_gamma = temp_gamma * kernel_mul_;
}
caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX1);
caffe_gpu_scal(data_dim_, mmd_lambda_ * factor_for_diff / sample_num * Dtype(32), tempX2);
caffe_gpu_add(data_dim_, tempX1, bottom_diff + t1 * data_dim_, bottom_diff + t1 * data_dim_);
caffe_gpu_add(data_dim_, tempX2, bottom_diff + t2 * data_dim_, bottom_diff + t2 * data_dim_);
if(i % 2 == 1){
caffe_sub(num_of_kernel_, temp_loss1, temp_loss2, temp_loss1);
caffe_mul(num_of_kernel_, temp_loss1, temp_loss1, temp_loss1);
caffe_add(num_of_kernel_, temp_loss1, variance_, variance_);
caffe_sub(num_of_kernel_, temp_loss3, temp_loss4, temp_loss3);
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num_of_kernel_, num_of_kernel_, 1, Dtype(1),temp_loss3, temp_loss3, Dtype(1), Q_[0]);
}
}
delete [] temp_loss1;
delete [] temp_loss2;
delete [] temp_loss3;
delete [] temp_loss4;
if(now_iter_ >= iter_of_epoch_){
gamma_ = Dtype(-1);
now_iter_ = 0;
//update beta
//normalize Q and sum_of_epoch_
caffe_scal(num_of_kernel_ * num_of_kernel_, Dtype(2) / all_sample_num_, Q_[0]);
caffe_scal(num_of_kernel_, Dtype(1) / all_sample_num_, sum_of_epoch_);
for(int i = 0; i < num_of_kernel_; i++){
for(int j = 0; j < num_of_kernel_; j++){
if(i != j){
Q_[0][i * num_of_kernel_ + j] = Dtype(0);
}
else{
Q_[0][i * num_of_kernel_ + j] += I_lambda_;
}
}
}
//Q <- Q + \lambda I
if(method_number_ == 4){
caffe_set(num_of_kernel_ * num_of_kernel_, Dtype(0), Q_[0]);
for(int i = 0;i < num_of_kernel_;++i){
Q_[0][(num_of_kernel_ + 1) * i] += Dtype(1);
}
}
//print_gpu_matrix(Q_[0], num_of_kernel_, num_of_kernel_, num_of_kernel_, num_of_kernel_);
//for(int i = 0; i < num_of_kernel_; i++){
// LOG(INFO) << sum_of_pure_mmd_[i];
//}
bool temp_bool = true;
for(int i = 0; i < num_of_kernel_; i++){
if(sum_of_pure_mmd_[i] > 0){
temp_bool = false;
}
}
bool has_negative = false;
for(int i = 0; i < num_of_kernel_; i++){
if(sum_of_pure_mmd_[i] < 0){
has_negative = true;
}
}
if(temp_bool){
caffe_scal(num_of_kernel_ * num_of_kernel_, Dtype(-1), Q_[0]);
}
switch(method_number_){
case 1:
{
if(has_negative){
break;
}
//sort by total kernel value
std::vector > sorted_kernels = maxn(top_k_, sum_of_pure_mmd_, num_of_kernel_);
caffe_set(num_of_kernel_, Dtype(0), beta_);
Dtype top_sum = 0;
for(int i = 0;i < top_k_;++i){
if(sorted_kernels[i].first > 0){
top_sum += sorted_kernels[i].first;
}
}
LOG(INFO) << "top_sum " << top_sum;
for(int i = 0;i < top_k_;++i){
LOG(INFO) << "mmd " << sorted_kernels[i].first;
LOG(INFO) << "id " << sorted_kernels[i].second;
if(sorted_kernels[i].first > 0){
beta_[sorted_kernels[i].second] = sorted_kernels[i].first / top_sum;
LOG(INFO) << "beta " << beta_[sorted_kernels[i].second];
}
}
break;
}
case 4:
{
float *equal_cons[num_of_kernel_];
bool lw_cons[num_of_kernel_];
bool up_cons[num_of_kernel_];
float lw_mul[num_of_kernel_];
float up_mul[num_of_kernel_];
float obj_first[num_of_kernel_];
for(int i = 0; i < num_of_kernel_; i++){
equal_cons[i] = new float[1];
equal_cons[i][0] = sum_of_pure_mmd_[i];
lw_cons[i] = true;
up_cons[i] = false;
lw_mul[i] = 0.0;
up_mul[i] = 0.0;
obj_first[i] = Dtype(0);
}
float b[1];
if(temp_bool){
b[0] = Dtype(-1);
}
else{
b[0] = Dtype(1);
}
CGAL::Const_oneset_iterator r(CGAL::EQUAL);
Program qp(num_of_kernel_, 1, equal_cons, b, r, lw_cons, lw_mul, up_cons, up_mul, (float**)Q_, obj_first, 0);
Solution s = CGAL::solve_quadratic_program(qp, ET());
LOG(INFO) << "before s";
LOG(INFO) << s;
int j = 0;
if(!has_negative){
for(CGAL::Quadratic_program_solution::Variable_value_iterator
it = s.variable_values_begin();
it < s.variable_values_end();++it, ++j){
beta_[j] = (Dtype)to_double(*it);
}
Dtype beta_sum = caffe_cpu_asum(num_of_kernel_, beta_);
caffe_scal(num_of_kernel_, 1 / beta_sum, beta_);
std::vector > sorted_betas = maxn(top_k_, beta_, num_of_kernel_);
caffe_set(num_of_kernel_, Dtype(0), beta_);
Dtype top_sum = 0;
for(int i = 0;i < top_k_;++i){
if(sorted_betas[i].first > 0){
top_sum += sorted_betas[i].first;
}
}
for(int i = 0;i < top_k_;++i){
LOG(INFO) << "mmd " << sorted_betas[i].first;
LOG(INFO) << "id " << sorted_betas[i].second;
if(sorted_betas[i].first > 0){
beta_[sorted_betas[i].second] = sorted_betas[i].first / top_sum;
LOG(INFO) << "beta " << beta_[sorted_betas[i].second];
}
}
}
else{
LOG(INFO) << "has negative value, do not change beta";
}
break;
}
case 3:
{
for(int i = 0; i < num_of_kernel_; i++){
if(sum_of_pure_mmd_[i] != 0){
sum_of_pure_mmd_[i] = sum_of_pure_mmd_[i] / (sqrt(variance_[i] + I_lambda_));
}
}
std::vector > sorted_kernels = maxn(top_k_, sum_of_pure_mmd_, num_of_kernel_);
caffe_set(num_of_kernel_, Dtype(0), beta_);
Dtype top_sum = 0;
for(int i = 0;i < top_k_;++i){
if(sorted_kernels[i].first > 0){
top_sum += sorted_kernels[i].first;
}
}
LOG(INFO) << "top_sum " << top_sum;
for(int i = 0;i < top_k_;++i){
LOG(INFO) << "mmd " << sorted_kernels[i].first;
LOG(INFO) << "id " << sorted_kernels[i].second;
if(sorted_kernels[i].first > 0){
beta_[sorted_kernels[i].second] = sorted_kernels[i].first / top_sum;
LOG(INFO) << "beta " << beta_[sorted_kernels[i].second];
}
}
break;
}
default:
break;
}
//use Q and sum_of_epoch_ to solve convex problem
caffe_set(num_of_kernel_ * num_of_kernel_, Dtype(0), Q_[0]);
caffe_set(num_of_kernel_, Dtype(0), variance_);
all_sample_num_ = 0;
caffe_set(num_of_kernel_, Dtype(0), sum_of_pure_mmd_);
}
/*LOG(INFO) << num_of_kernel_;*/
/*for(int i = 0; i < num_of_kernel_; i++){*/
/*LOG(INFO) << "kernel" << i << ": " << sum_of_epoch_[i];*/
/*}*/
caffe_set(num_of_kernel_, Dtype(0), sum_of_epoch_);
caffe_gpu_asum(input_num_ * data_dim_, bottom[0]->gpu_diff(), &sum);
LOG(INFO) << "after mmd diff sum " << sum;
LOG(INFO) << "------";
}
INSTANTIATE_LAYER_GPU_FUNCS(MMDLossLayer);
}
3.在/caffe-master/src/caffe/proto/caffe.proto里声明参数
只需修改2处:
1)在message LayerParameter里增加一个option
optional MMDParameter mmd_param = 147;
所取值在message LayerParameter前的注释会有说明。
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
2)在/caffe-master/src/caffe/proto/caffe.proto里增加层的参数信息
// Message that stores parameters used by MMD layer
message MMDParameter {
optional uint32 num_of_kernel = 1 [default = 5];
optional float mmd_lambda = 2 [default = 1.0];
optional uint32 iter_of_epoch = 3 [default = 0];
optional float kernel_mul = 6[default = 2];
optional MMDMethodParameter method_param = 4;
optional string method = 5 [default = "none"];
optional bool fix_gamma = 7 [default = false];
}
message MMDMethodParameter{
optional uint32 top_num = 1[default = 1];
optional float i_lambda = 2[default = 0.0001];
}
4.因为使用到CGAL库,所有需要将在/home/zhuangni/code/TransferMMD/caffe-mmd/Makefile中添加CGAL 库
LIBRARIES += glog gflags protobuf boost_system boost_filesystem m hdf5_hl hdf5 CGAL
5.对caffe进行重新编译即可
6.层的使用
layer {
name: "mmd3/4d"
type: "MMDLoss"
bottom: "fc7"
bottom: "label"
top: "fc7"
mmd_param {
num_of_kernel: 5
mmd_lambda: 0.3 // Users only need to tune mmd_lambda
parameter.
iter_of_epoch: 22 //iter_of_epoch = (source_num + target_num) / batch_size
method_param {
top_num: 5
i_lambda: 0.0
}
method: "none"
kernel_mul: 2.0
fix_gamma: false
}
}