(1)移植辅助的文件
将include/caffe/util/下的coords.hpp和modified_permutohedral.hpp复制到caffe-windows对应的目录
将src/caffe/util/modified_permutohedral.cpp复制到对应的目录中去
(2)移植Layer中的特性
在include/caffe/layer.hpp中添加
如下代码:
#include "caffe/util/coords.hpp"
和以下代码:
virtual DiagonalAffineMap<Dtype> coord_map() {
NOT_IMPLEMENTED;
// suppress warnings
return DiagonalAffineMap<Dtype>(vector<pair<Dtype, Dtype> >());
}
修改后的文件如下:
#ifndef CAFFE_LAYER_H_
#define CAFFE_LAYER_H_
#include <algorithm>
#include <string>
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer_factory.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/coords.hpp"
#include "caffe/util/math_functions.hpp"
/**
Forward declare boost::thread instead of including boost/thread.hpp
to avoid a boost/NVCC issues (#1009, #1010) on OSX.
*/
namespace boost { class mutex; }
namespace caffe {
/**
* @brief An interface for the units of computation which can be composed into a
* Net.
*
* Layer%s must implement a Forward function, in which they take their input
* (bottom) Blob%s (if any) and compute their output Blob%s (if any).
* They may also implement a Backward function, in which they compute the error
* gradients with respect to their input Blob%s, given the error gradients with
* their output Blob%s.
*/
template <typename Dtype>
class Layer {
public:
/**
* You should not implement your own constructor. Any set up code should go
* to SetUp(), where the dimensions of the bottom blobs are provided to the
* layer.
*/
explicit Layer(const LayerParameter& param)
: layer_param_(param), is_shared_(false) {
// Set phase and copy blobs (if there are any).
phase_ = param.phase();
if (layer_param_.blobs_size() > 0) {
blobs_.resize(layer_param_.blobs_size());
for (int i = 0; i < layer_param_.blobs_size(); ++i) {
blobs_[i].reset(new Blob<Dtype>());
blobs_[i]->FromProto(layer_param_.blobs(i));
}
}
}
virtual ~Layer() {}
/**
* @brief Implements common layer setup functionality.
*
* @param bottom the preshaped input blobs
* @param top
* the allocated but unshaped output blobs, to be shaped by Reshape
*
* Checks that the number of bottom and top blobs is correct.
* Calls LayerSetUp to do special layer setup for individual layer types,
* followed by Reshape to set up sizes of top blobs and internal buffers.
* Sets up the loss weight multiplier blobs for any non-zero loss weights.
* This method may not be overridden.
*/
void SetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
InitMutex();
CheckBlobCounts(bottom, top);
LayerSetUp(bottom, top);
Reshape(bottom, top);
SetLossWeights(top);
}
/**
* @brief Does layer-specific setup: your layer should implement this function
* as well as Reshape.
*
* @param bottom
* the preshaped input blobs, whose data fields store the input data for
* this layer
* @param top
* the allocated but unshaped output blobs
*
* This method should do one-time layer specific setup. This includes reading
* and processing relevent parameters from the <code>layer_param_</code>.
* Setting up the shapes of top blobs and internal buffers should be done in
* <code>Reshape</code>, which will be called before the forward pass to
* adjust the top blob sizes.
*/
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {}
/**
* @brief Whether a layer should be shared by multiple nets during data
* parallelism. By default, all layers except for data layers should
* not be shared. data layers should be shared to ensure each worker
* solver access data sequentially during data parallelism.
*/
virtual inline bool ShareInParallel() const { return false; }
/** @brief Return whether this layer is actually shared by other nets.
* If ShareInParallel() is true and using more than one GPU and the
* net has TRAIN phase, then this function is expected return true.
*/
inline bool IsShared() const { return is_shared_; }
/** @brief Set whether this layer is actually shared by other nets
* If ShareInParallel() is true and using more than one GPU and the
* net has TRAIN phase, then is_shared should be set true.
*/
inline void SetShared(bool is_shared) {
CHECK(ShareInParallel() || !is_shared)
<< type() << "Layer does not support sharing.";
is_shared_ = is_shared;
}
/**
* @brief Adjust the shapes of top blobs and internal buffers to accommodate
* the shapes of the bottom blobs.
*
* @param bottom the input blobs, with the requested input shapes
* @param top the top blobs, which should be reshaped as needed
*
* This method should reshape top blobs as needed according to the shapes
* of the bottom (input) blobs, as well as reshaping any internal buffers
* and making any other necessary adjustments so that the layer can
* accommodate the bottom blobs.
*/
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) = 0;
/**
* @brief Given the bottom blobs, compute the top blobs and the loss.
*
* @param bottom
* the input blobs, whose data fields store the input data for this layer
* @param top
* the preshaped output blobs, whose data fields will store this layers'
* outputs
* \return The total loss from the layer.
*
* The Forward wrapper calls the relevant device wrapper function
* (Forward_cpu or Forward_gpu) to compute the top blob values given the
* bottom blobs. If the layer has any non-zero loss_weights, the wrapper
* then computes and returns the loss.
*
* Your layer should implement Forward_cpu and (optionally) Forward_gpu.
*/
inline Dtype Forward(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
/**
* @brief Given the top blob error gradients, compute the bottom blob error
* gradients.
*
* @param top
* the output blobs, whose diff fields store the gradient of the error
* with respect to themselves
* @param propagate_down
* a vector with equal length to bottom, with each index indicating
* whether to propagate the error gradients down to the bottom blob at
* the corresponding index
* @param bottom
* the input blobs, whose diff fields will store the gradient of the error
* with respect to themselves after Backward is run
*
* The Backward wrapper calls the relevant device wrapper function
* (Backward_cpu or Backward_gpu) to compute the bottom blob diffs given the
* top blob diffs.
*
* Your layer should implement Backward_cpu and (optionally) Backward_gpu.
*/
inline void Backward(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom);
/**
* @brief Returns the vector of learnable parameter blobs.
*/
vector<shared_ptr<Blob<Dtype> > >& blobs() {
return blobs_;
}
/**
* @brief Returns the layer parameter.
*/
const LayerParameter& layer_param() const { return layer_param_; }
/**
* @brief Writes the layer parameter to a protocol buffer
*/
virtual void ToProto(LayerParameter* param, bool write_diff = false);
/**
* @brief Returns the scalar loss associated with a top blob at a given index.
*/
inline Dtype loss(const int top_index) const {
return (loss_.size() > top_index) ? loss_[top_index] : Dtype(0);
}
/**
* @brief Sets the loss associated with a top blob at a given index.
*/
inline void set_loss(const int top_index, const Dtype value) {
if (loss_.size() <= top_index) {
loss_.resize(top_index + 1, Dtype(0));
}
loss_[top_index] = value;
}
/**
* @brief Returns the layer type.
*/
virtual inline const char* type() const { return ""; }
/**
* @brief Returns the exact number of bottom blobs required by the layer,
* or -1 if no exact number is required.
*
* This method should be overridden to return a non-negative value if your
* layer expects some exact number of bottom blobs.
*/
virtual inline int ExactNumBottomBlobs() const { return -1; }
/**
* @brief Returns the minimum number of bottom blobs required by the layer,
* or -1 if no minimum number is required.
*
* This method should be overridden to return a non-negative value if your
* layer expects some minimum number of bottom blobs.
*/
virtual inline int MinBottomBlobs() const { return -1; }
/**
* @brief Returns the maximum number of bottom blobs required by the layer,
* or -1 if no maximum number is required.
*
* This method should be overridden to return a non-negative value if your
* layer expects some maximum number of bottom blobs.
*/
virtual inline int MaxBottomBlobs() const { return -1; }
/**
* @brief Returns the exact number of top blobs required by the layer,
* or -1 if no exact number is required.
*
* This method should be overridden to return a non-negative value if your
* layer expects some exact number of top blobs.
*/
virtual inline int ExactNumTopBlobs() const { return -1; }
/**
* @brief Returns the minimum number of top blobs required by the layer,
* or -1 if no minimum number is required.
*
* This method should be overridden to return a non-negative value if your
* layer expects some minimum number of top blobs.
*/
virtual inline int MinTopBlobs() const { return -1; }
/**
* @brief Returns the maximum number of top blobs required by the layer,
* or -1 if no maximum number is required.
*
* This method should be overridden to return a non-negative value if your
* layer expects some maximum number of top blobs.
*/
virtual inline int MaxTopBlobs() const { return -1; }
/**
* @brief Returns true if the layer requires an equal number of bottom and
* top blobs.
*
* This method should be overridden to return true if your layer expects an
* equal number of bottom and top blobs.
*/
virtual inline bool EqualNumBottomTopBlobs() const { return false; }
/**
* @brief Return whether "anonymous" top blobs are created automatically
* by the layer.
*
* If this method returns true, Net::Init will create enough "anonymous" top
* blobs to fulfill the requirement specified by ExactNumTopBlobs() or
* MinTopBlobs().
*/
virtual inline bool AutoTopBlobs() const { return false; }
/**
* @brief Return whether to allow force_backward for a given bottom blob
* index.
*
* If AllowForceBackward(i) == false, we will ignore the force_backward
* setting and backpropagate to blob i only if it needs gradient information
* (as is done when force_backward == false).
*/
virtual inline bool AllowForceBackward(const int bottom_index) const {
return true;
}
/**
* @brief Specifies whether the layer should compute gradients w.r.t. a
* parameter at a particular index given by param_id.
*
* You can safely ignore false values and always compute gradients
* for all parameters, but possibly with wasteful computation.
*/
inline bool param_propagate_down(const int param_id) {
return (param_propagate_down_.size() > param_id) ?
param_propagate_down_[param_id] : false;
}
/**
* @brief Sets whether the layer should compute gradients w.r.t. a
* parameter at a particular index given by param_id.
*/
inline void set_param_propagate_down(const int param_id, const bool value) {
if (param_propagate_down_.size() <= param_id) {
param_propagate_down_.resize(param_id + 1, true);
}
param_propagate_down_[param_id] = value;
}
virtual DiagonalAffineMap<Dtype> coord_map() {
NOT_IMPLEMENTED;
// suppress warnings
return DiagonalAffineMap<Dtype>(vector<pair<Dtype, Dtype> >());
}
protected:
/** The protobuf that stores the layer parameters */
LayerParameter layer_param_;
/** The phase: TRAIN or TEST */
Phase phase_;
/** The vector that stores the learnable parameters as a set of blobs. */
vector<shared_ptr<Blob<Dtype> > > blobs_;
/** Vector indicating whether to compute the diff of each param blob. */
vector<bool> param_propagate_down_;
/** The vector that indicates whether each top blob has a non-zero weight in
* the objective function. */
vector<Dtype> loss_;
/** @brief Using the CPU device, compute the layer output. */
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) = 0;
/**
* @brief Using the GPU device, compute the layer output.
* Fall back to Forward_cpu() if unavailable.
*/
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// LOG(WARNING) << "Using CPU code as backup.";
return Forward_cpu(bottom, top);
}
/**
* @brief Using the CPU device, compute the gradients for any parameters and
* for the bottom blobs if propagate_down is true.
*/
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) = 0;
/**
* @brief Using the GPU device, compute the gradients for any parameters and
* for the bottom blobs if propagate_down is true.
* Fall back to Backward_cpu() if unavailable.
*/
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
// LOG(WARNING) << "Using CPU code as backup.";
Backward_cpu(top, propagate_down, bottom);
}
/**
* Called by the parent Layer's SetUp to check that the number of bottom
* and top Blobs provided as input match the expected numbers specified by
* the {ExactNum,Min,Max}{Bottom,Top}Blobs() functions.
*/
virtual void CheckBlobCounts(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
if (ExactNumBottomBlobs() >= 0) {
CHECK_EQ(ExactNumBottomBlobs(), bottom.size())
<< type() << " Layer takes " << ExactNumBottomBlobs()
<< " bottom blob(s) as input.";
}
if (MinBottomBlobs() >= 0) {
CHECK_LE(MinBottomBlobs(), bottom.size())
<< type() << " Layer takes at least " << MinBottomBlobs()
<< " bottom blob(s) as input.";
}
if (MaxBottomBlobs() >= 0) {
CHECK_GE(MaxBottomBlobs(), bottom.size())
<< type() << " Layer takes at most " << MaxBottomBlobs()
<< " bottom blob(s) as input.";
}
if (ExactNumTopBlobs() >= 0) {
CHECK_EQ(ExactNumTopBlobs(), top.size())
<< type() << " Layer produces " << ExactNumTopBlobs()
<< " top blob(s) as output.";
}
if (MinTopBlobs() >= 0) {
CHECK_LE(MinTopBlobs(), top.size())
<< type() << " Layer produces at least " << MinTopBlobs()
<< " top blob(s) as output.";
}
if (MaxTopBlobs() >= 0) {
CHECK_GE(MaxTopBlobs(), top.size())
<< type() << " Layer produces at most " << MaxTopBlobs()
<< " top blob(s) as output.";
}
if (EqualNumBottomTopBlobs()) {
CHECK_EQ(bottom.size(), top.size())
<< type() << " Layer produces one top blob as output for each "
<< "bottom blob input.";
}
}
/**
* Called by SetUp to initialize the weights associated with any top blobs in
* the loss function. Store non-zero loss weights in the diff blob.
*/
inline void SetLossWeights(const vector<Blob<Dtype>*>& top) {
const int num_loss_weights = layer_param_.loss_weight_size();
if (num_loss_weights) {
CHECK_EQ(top.size(), num_loss_weights) << "loss_weight must be "
"unspecified or specified once per top blob.";
for (int top_id = 0; top_id < top.size(); ++top_id) {
const Dtype loss_weight = layer_param_.loss_weight(top_id);
if (loss_weight == Dtype(0)) { continue; }
this->set_loss(top_id, loss_weight);
const int count = top[top_id]->count();
Dtype* loss_multiplier = top[top_id]->mutable_cpu_diff();
caffe_set(count, loss_weight, loss_multiplier);
}
}
}
private:
/** Whether this layer is actually shared by other nets*/
bool is_shared_;
/** The mutex for sequential forward if this layer is shared */
shared_ptr<boost::mutex> forward_mutex_;
/** Initialize forward_mutex_ */
void InitMutex();
/** Lock forward_mutex_ if this layer is shared */
void Lock();
/** Unlock forward_mutex_ if this layer is shared */
void Unlock();
DISABLE_COPY_AND_ASSIGN(Layer);
}; // class Layer
// Forward and backward wrappers. You should implement the cpu and
// gpu specific implementations instead, and should not change these
// functions.
template <typename Dtype>
inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// Lock during forward to ensure sequential forward
Lock();
Dtype loss = 0;
Reshape(bottom, top);
switch (Caffe::mode()) {
case Caffe::CPU:
Forward_cpu(bottom, top);
for (int top_id = 0; top_id < top.size(); ++top_id) {
if (!this->loss(top_id)) { continue; }
const int count = top[top_id]->count();
const Dtype* data = top[top_id]->cpu_data();
const Dtype* loss_weights = top[top_id]->cpu_diff();
loss += caffe_cpu_dot(count, data, loss_weights);
}
break;
case Caffe::GPU:
Forward_gpu(bottom, top);
#ifndef CPU_ONLY
for (int top_id = 0; top_id < top.size(); ++top_id) {
if (!this->loss(top_id)) { continue; }
const int count = top[top_id]->count();
const Dtype* data = top[top_id]->gpu_data();
const Dtype* loss_weights = top[top_id]->gpu_diff();
Dtype blob_loss = 0;
caffe_gpu_dot(count, data, loss_weights, &blob_loss);
loss += blob_loss;
}
#endif
break;
default:
LOG(FATAL) << "Unknown caffe mode.";
}
Unlock();
return loss;
}
template <typename Dtype>
inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
switch (Caffe::mode()) {
case Caffe::CPU:
Backward_cpu(top, propagate_down, bottom);
break;
case Caffe::GPU:
Backward_gpu(top, propagate_down, bottom);
break;
default:
LOG(FATAL) << "Unknown caffe mode.";
}
}
// Serialize LayerParameter to protocol buffer
template <typename Dtype>
void Layer<Dtype>::ToProto(LayerParameter* param, bool write_diff) {
param->Clear();
param->CopyFrom(layer_param_);
param->clear_blobs();
for (int i = 0; i < blobs_.size(); ++i) {
blobs_[i]->ToProto(param->add_blobs(), write_diff);
}
}
} // namespace caffe
#endif // CAFFE_LAYER_H_
(3)移植CRF层
将caffe/vision_layers.hpp中的MultiStageMeanfieldLayer对应的声明复制到caffe-windows下的caffe/layers/目录下
创建multi_stage_meanfield.hpp
文件的内容如下:
/*!
* \brief A helper class for {@link MultiStageMeanfieldLayer} class, which is the Caffe layer that implements the
* CRF-RNN described in the paper: Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* This class itself is not a proper Caffe layer although it behaves like one to some degree.
*
* \authors Sadeep Jayasumana, Bernardino Romera-Paredes, Shuai Zheng, Zhizhong Su.
* \version 1.0
* \date 2015
* \copyright Torr Vision Group, University of Oxford.
* \details If you use this code, please consider citing the paper:
* Shuai Zheng, Sadeep Jayasumana, Bernardino Romera-Paredes, Vibhav Vineet, Zhizhong Su, Dalong Du,
* Chang Huang, Philip H. S. Torr. Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* For more information about CRF-RNN, please visit the project website http://crfasrnn.torr.vision.
*/
#ifndef CAFFE_MULTI_STAGE_MEANFIELD_LAYER_HPP_
#define CAFFE_MULTI_STAGE_MEANFIELD_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/modified_permutohedral.hpp"
#include "caffe/layers/softmax_layer.hpp"
#include "caffe/layers/eltwise_layer.hpp"
#include "caffe/layers/split_layer.hpp"
#include <boost/shared_array.hpp>
namespace caffe {
/*!
* \brief The Caffe layer that implements the CRF-RNN described in the paper:
* Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* \authors Sadeep Jayasumana, Bernardino Romera-Paredes, Shuai Zheng, Zhizhong Su.
* \version 1.0
* \date 2015
* \copyright Torr Vision Group, University of Oxford.
* \details If you use this code, please consider citing the paper:
* Shuai Zheng, Sadeep Jayasumana, Bernardino Romera-Paredes, Vibhav Vineet, Zhizhong Su, Dalong Du,
* Chang Huang, Philip H. S. Torr. Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* For more information about CRF-RNN, please visit the project website http://crfasrnn.torr.vision.
*/
template <typename Dtype>
class MultiStageMeanfieldLayer : public Layer <Dtype > {
public :
explicit MultiStageMeanfieldLayer ( const LayerParameter & param ) : Layer< Dtype >(param ) {}
virtual void LayerSetUp( const vector < Blob< Dtype >*>& bottom,
const vector < Blob< Dtype >*>& top);
virtual void Reshape( const vector < Blob< Dtype >*>& bottom,
const vector < Blob< Dtype >*>& top);
virtual inline const char * type () const {
return "MultiStageMeanfield" ;
}
virtual inline int ExactNumBottomBlobs () const { return 3 ; }
virtual inline int ExactNumTopBlobs () const { return 1 ; }
protected :
virtual void Forward_cpu( const vector < Blob< Dtype >*>& bottom,
const vector < Blob< Dtype >*>& top);
virtual void Backward_cpu( const vector < Blob< Dtype >*>& top,
const vector < bool>& propagate_down , const vector< Blob <Dtype >*>& bottom );
virtual void compute_spatial_kernel (float * const output_kernel );
virtual void compute_bilateral_kernel (const Blob< Dtype >* const rgb_blob, const int n, float* const output_kernel);
int count_ ;
int num_ ;
int channels_ ;
int height_ ;
int width_ ;
int num_pixels_ ;
Dtype theta_alpha_ ;
Dtype theta_beta_ ;
Dtype theta_gamma_ ;
int num_iterations_ ;
boost ::shared_array < Dtype> norm_feed_;
Blob <Dtype > spatial_norm_ ;
Blob <Dtype > bilateral_norms_ ;
vector <Blob < Dtype>*> split_layer_bottom_vec_ ;
vector <Blob < Dtype>*> split_layer_top_vec_ ;
vector <shared_ptr < Blob< Dtype > > > split_layer_out_blobs_ ;
vector <shared_ptr < Blob< Dtype > > > iteration_output_blobs_ ;
vector <shared_ptr < MeanfieldIteration <Dtype > > > meanfield_iterations_ ;
shared_ptr <SplitLayer < Dtype> > split_layer_ ;
shared_ptr <ModifiedPermutohedral > spatial_lattice_ ;
boost ::shared_array < float> bilateral_kernel_buffer_ ;
vector <shared_ptr < ModifiedPermutohedral > > bilateral_lattices_ ;
};
} // namespace caffe
#endif //
将caffe/layers/multi_stage_meanfield.cpp都复制到caffe-windows对应的目录下
然后将其中的include的vision_layers.hpp修改为multi_stage_meanfield.hpp
并在multi_stage_meanfield.cpp 文件的末尾修改类的声明方式为:
INSTANTIATE_CLASS(MultiStageMeanfieldLayer);
REGISTER_LAYER_CLASS(MultiStageMeanfield);
修改后的multi_stage_meanfield.cpp文件如下所示
/*!
* \brief The Caffe layer that implements the CRF-RNN described in the paper:
* Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* \authors Sadeep Jayasumana, Bernardino Romera-Paredes, Shuai Zheng, Zhizhong Su.
* \version 1.0
* \date 2015
* \copyright Torr Vision Group, University of Oxford.
* \details If you use this code, please consider citing the paper:
* Shuai Zheng, Sadeep Jayasumana, Bernardino Romera-Paredes, Vibhav Vineet, Zhizhong Su, Dalong Du,
* Chang Huang, Philip H. S. Torr. Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* For more information about CRF-RNN, please visit the project website http://crfasrnn.torr.vision.
*/
#include <vector>
#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/im2col.hpp"
#include "caffe/layers/multi_stage_meanfield.hpp"
namespace caffe {
template <typename Dtype>
void MultiStageMeanfieldLayer <Dtype >:: LayerSetUp( const vector < Blob< Dtype >*>& bottom ,
const vector < Blob< Dtype >*>& top ) {
const caffe :: MultiStageMeanfieldParameter meanfield_param = this -> layer_param_. multi_stage_meanfield_param ();
num_iterations_ = meanfield_param . num_iterations ();
CHECK_GT (num_iterations_ , 1 ) << "Number of iterations must be greater than 1." ;
theta_alpha_ = meanfield_param .theta_alpha ();
theta_beta_ = meanfield_param .theta_beta ();
theta_gamma_ = meanfield_param .theta_gamma ();
count_ = bottom[ 0 ]->count ();
num_ = bottom[ 0 ]->num ();
channels_ = bottom[ 0 ]->channels ();
height_ = bottom[ 0 ]->height ();
width_ = bottom[ 0 ]->width ();
num_pixels_ = height_ * width_ ;
LOG (INFO ) << "This implementation has not been tested batch size > 1." ;
top [0 ]-> Reshape( num_ , channels_ , height_ , width_ );
// Initialize the parameters that will updated by backpropagation.
if ( this ->blobs_ . size() > 0 ) {
LOG (INFO ) << "Multimeanfield layer skipping parameter initialization." ;
} else {
this ->blobs_ . resize( 3 );// blobs_[0] - spatial kernel weights, blobs_[1] - bilateral kernel weights, blobs_[2] - compatability matrix
// Allocate space for kernel weights.
this ->blobs_ [ 0]. reset (new Blob< Dtype >(1 , 1 , channels_ , channels_ ));
this ->blobs_ [ 1]. reset (new Blob< Dtype >(1 , 1 , channels_ , channels_ ));
caffe_set (channels_ * channels_ , Dtype (0. ), this -> blobs_[ 0 ]->mutable_cpu_data ());
caffe_set (channels_ * channels_ , Dtype (0. ), this -> blobs_[ 1 ]->mutable_cpu_data ());
// Initialize the kernels weights. The two files spatial.par and bilateral.par should be available.
FILE * pFile;
pFile = fopen( "spatial.par" , "r" );
CHECK (pFile ) << "The file 'spatial.par' is not found. Please create it with initial spatial kernel weights." ;
for ( int i = 0 ; i < channels_; i++) {
fscanf ( pFile, "%lf" , & this-> blobs_ [0 ]-> mutable_cpu_data ()[i * channels_ + i]);
}
fclose (pFile );
pFile = fopen( "bilateral.par" , "r" );
CHECK (pFile ) << "The file 'bilateral.par' is not found. Please create it with initial bilateral kernel weights." ;
for ( int i = 0 ; i < channels_; i++) {
fscanf ( pFile, "%lf" , & this-> blobs_ [1 ]-> mutable_cpu_data ()[i * channels_ + i]);
}
fclose (pFile );
// Initialize the compatibility matrix.
this ->blobs_ [ 2]. reset (new Blob< Dtype >(1 , 1 , channels_ , channels_ ));
caffe_set (channels_ * channels_ , Dtype (0. ), this -> blobs_[ 2 ]->mutable_cpu_data ());
// Initialize it to have the Potts model.
for ( int c = 0 ; c < channels_; ++c ) {
( this-> blobs_ [2 ]-> mutable_cpu_data ())[c * channels_ + c] = Dtype (-1. );
}
}
// Initialize the spatial lattice. This does not need to be computed for every image because we use a fixed size.
float * spatial_kernel = new float [ 2 * num_pixels_ ]; // 声明了要删除!!!!
compute_spatial_kernel ( spatial_kernel );
spatial_lattice_ . reset( new ModifiedPermutohedral ());
spatial_lattice_ -> init( spatial_kernel , 2 , num_pixels_ );
delete [] spatial_kernel ;
// Calculate spatial filter normalization factors.
norm_feed_ .reset ( new Dtype [num_pixels_ ]);
caffe_set (num_pixels_ , Dtype ( 1.0), norm_feed_. get ());
spatial_norm_ .Reshape ( 1, 1, height_, width_);
Dtype * norm_data = spatial_norm_. mutable_cpu_data ();
spatial_lattice_ -> compute( norm_data , norm_feed_ .get (), 1 );
for ( int i = 0 ; i < num_pixels_; ++i ) {
norm_data [i ] = 1.0f / ( norm_data [i ] + 1e-20f);
}
// Allocate space for bilateral kernels. This is a temporary buffer used to compute bilateral lattices later.
// Also allocate space for holding bilateral filter normalization values.
bilateral_kernel_buffer_ . reset( new float [ 5 * num_pixels_ ]);
bilateral_norms_ . Reshape( num_ , 1 , height_ , width_ );
// Configure the split layer that is used to make copies of the unary term. One copy for each iteration.
// It may be possible to optimize this calculation later.
split_layer_bottom_vec_ . clear();
split_layer_bottom_vec_ . push_back( bottom [0 ]);
split_layer_top_vec_ . clear();
split_layer_out_blobs_ . resize( num_iterations_ );
for ( int i = 0 ; i < num_iterations_ ; i ++) {
split_layer_out_blobs_ [ i]. reset (new Blob< Dtype >());
split_layer_top_vec_ . push_back( split_layer_out_blobs_ [ i]. get ());
}
LayerParameter split_layer_param ;
split_layer_ .reset ( new SplitLayer <Dtype >( split_layer_param ));
split_layer_ ->SetUp ( split_layer_bottom_vec_ , split_layer_top_vec_ );
// Make blobs to store outputs of each meanfield iteration. Output of the last iteration is stored in top[0].
// So we need only (num_iterations_ - 1) blobs.
iteration_output_blobs_ . resize( num_iterations_ - 1 );
for ( int i = 0 ; i < num_iterations_ - 1; ++i ) {
iteration_output_blobs_ [ i]. reset (new Blob< Dtype >(num_ , channels_ , height_ , width_ ));
}
// Make instances of MeanfieldIteration and initialize them.
meanfield_iterations_ . resize( num_iterations_ );
for ( int i = 0 ; i < num_iterations_ ; ++ i) {
meanfield_iterations_ [ i]. reset (new MeanfieldIteration <Dtype >());
meanfield_iterations_ [ i]-> OneTimeSetUp (
split_layer_out_blobs_ [ i]. get (), // unary terms
( i == 0 ) ? bottom[ 1 ] : iteration_output_blobs_ [ i - 1 ]. get(), // softmax input
( i == num_iterations_ - 1 ) ? top [ 0] : iteration_output_blobs_ [ i]. get (), // output blob
spatial_lattice_ , // spatial lattice
& spatial_norm_); // spatial normalization factors.
}
this ->param_propagate_down_ . resize( this ->blobs_ . size(), true);
LOG (INFO ) << ("MultiStageMeanfieldLayer initialized." );
}
template <typename Dtype>
void MultiStageMeanfieldLayer <Dtype >:: Reshape( const vector < Blob< Dtype >*>& bottom ,
const vector < Blob< Dtype >*>& top ) {
// Do nothing.
}
/**
* Performs filter-based mean field inference given the image and unaries.
*
* bottom[0] - Unary terms
* bottom[1] - Softmax input/Output from the previous iteration (a copy of the unary terms if this is the first stage).
* bottom[2] - RGB images
*
* top[0] - Output of the mean field inference (not normalized).
*/
template <typename Dtype>
void MultiStageMeanfieldLayer <Dtype >:: Forward_cpu( const vector < Blob< Dtype >*>& bottom ,
const vector < Blob< Dtype >*>& top ) {
split_layer_bottom_vec_ [ 0] = bottom [0 ];
split_layer_ ->Forward ( split_layer_bottom_vec_ , split_layer_top_vec_ );
// Initialize the bilateral lattices.
bilateral_lattices_ . resize( num_ );
for ( int n = 0 ; n < num_; ++n ) {
compute_bilateral_kernel ( bottom[ 2 ], n , bilateral_kernel_buffer_ . get());
bilateral_lattices_ [ n]. reset (new ModifiedPermutohedral ());
bilateral_lattices_ [ n]-> init (bilateral_kernel_buffer_ . get(), 5, num_pixels_);
// Calculate bilateral filter normalization factors.
Dtype * norm_output_data = bilateral_norms_ . mutable_cpu_data () + bilateral_norms_ . offset( n );
bilateral_lattices_ [ n]-> compute (norm_output_data , norm_feed_ . get(), 1);
for ( int i = 0 ; i < num_pixels_; ++i ) {
norm_output_data [ i] = 1.f / (norm_output_data [ i] + 1e-20f );
}
}
for ( int i = 0 ; i < num_iterations_ ; ++ i) {
meanfield_iterations_ [ i]-> PrePass (this -> blobs_, &bilateral_lattices_ , & bilateral_norms_ );
meanfield_iterations_ [ i]-> Forward_cpu ();
}
}
/**
* Backprop through filter-based mean field inference.
*/
template < typename Dtype >
void MultiStageMeanfieldLayer <Dtype >:: Backward_cpu(
const vector < Blob< Dtype >*>& top , const vector< bool >& propagate_down ,
const vector < Blob< Dtype >*>& bottom ) {
for ( int i = ( num_iterations_ - 1); i >= 0 ; -- i ) {
meanfield_iterations_ [ i]-> Backward_cpu ();
}
vector <bool > split_layer_propagate_down ( 1, true);
split_layer_ ->Backward ( split_layer_top_vec_ , split_layer_propagate_down , split_layer_bottom_vec_ );
// Accumulate diffs from mean field iterations.
for ( int blob_id = 0 ; blob_id < this-> blobs_ .size (); ++ blob_id ) {
Blob <Dtype >* cur_blob = this ->blobs_ [ blob_id]. get ();
if ( this ->param_propagate_down_ [ blob_id]) {
caffe_set (cur_blob -> count(), Dtype( 0 ), cur_blob ->mutable_cpu_diff ());
for ( int i = 0 ; i < num_iterations_ ; ++ i) {
const Dtype * diffs_to_add = meanfield_iterations_ [ i]-> blobs ()[blob_id ]-> cpu_diff();
caffe_axpy (cur_blob -> count(), Dtype( 1. ), diffs_to_add , cur_blob ->mutable_cpu_diff ());
}
}
}
}
template < typename Dtype >
void MultiStageMeanfieldLayer <Dtype >:: compute_bilateral_kernel (const Blob< Dtype >* const rgb_blob , const int n ,
float * const output_kernel ) {
for ( int p = 0 ; p < num_pixels_; ++p ) {
output_kernel [5 * p ] = static_cast < float>( p % width_) / theta_alpha_ ;
output_kernel [5 * p + 1] = static_cast <float >( p / width_ ) / theta_alpha_;
const Dtype * const rgb_data_start = rgb_blob ->cpu_data () + rgb_blob-> offset (n );
output_kernel [5 * p + 2] = static_cast <float >( rgb_data_start [p ] / theta_beta_);
output_kernel [5 * p + 3] = static_cast <float >(( rgb_data_start + num_pixels_)[ p ] / theta_beta_ );
output_kernel [5 * p + 4] = static_cast <float >(( rgb_data_start + num_pixels_ * 2 )[ p] / theta_beta_ );
}
}
template <typename Dtype>
void MultiStageMeanfieldLayer <Dtype >:: compute_spatial_kernel (float * const output_kernel) {
for ( int p = 0 ; p < num_pixels_; ++p ) {
output_kernel [2 * p] = static_cast <float >( p % width_ ) / theta_gamma_;
output_kernel [2 * p + 1 ] = static_cast< float >(p / width_ ) / theta_gamma_ ;
}
}
INSTANTIATE_CLASS ( MultiStageMeanfieldLayer );
REGISTER_LAYER_CLASS ( MultiStageMeanfield );
} // namespace caffe
将caffe/vision_layers.hpp中的MeanfieldIteration 对应的声明复制到caffe-windows下的caffe/layers/目录下
创建meanfield_iteration.hpp
meanfield_iteration.hpp文件如下:
/*!
* \brief A helper class for {@link MultiStageMeanfieldLayer} class, which is the Caffe layer that implements the
* CRF-RNN described in the paper: Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* This class itself is not a proper Caffe layer although it behaves like one to some degree.
*
* \authors Sadeep Jayasumana, Bernardino Romera-Paredes, Shuai Zheng, Zhizhong Su.
* \version 1.0
* \date 2015
* \copyright Torr Vision Group, University of Oxford.
* \details If you use this code, please consider citing the paper:
* Shuai Zheng, Sadeep Jayasumana, Bernardino Romera-Paredes, Vibhav Vineet, Zhizhong Su, Dalong Du,
* Chang Huang, Philip H. S. Torr. Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* For more information about CRF-RNN, please visit the project website http://crfasrnn.torr.vision.
*/
#ifndef CAFFE_MEANFIELD_ITERATION_HPP_
#define CAFFE_MEANFIELD_ITERATION_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/layers/softmax_layer.hpp"
#include "caffe/layers/eltwise_layer.hpp"
#include "caffe/layers/split_layer.hpp"
#include <boost/shared_array.hpp>
namespace caffe {
template <typename Dtype>
class MeanfieldIteration {
public :
/**
* Must be invoked only once after the construction of the layer.
*/
void OneTimeSetUp (
Blob <Dtype >* const unary_terms ,
Blob <Dtype >* const softmax_input ,
Blob <Dtype >* const output_blob ,
const shared_ptr < ModifiedPermutohedral > spatial_lattice ,
const Blob < Dtype>* const spatial_norm );
/**
* Must be invoked before invoking {@link Forward_cpu()}
*/
virtual void PrePass(
const vector < shared_ptr< Blob <Dtype > > >& parameters_to_copy_from ,
const vector < shared_ptr< ModifiedPermutohedral > >* const bilateral_lattices ,
const Blob < Dtype>* const bilateral_norms );
/**
* Forward pass - to be called during inference.
*/
virtual void Forward_cpu();
/**
* Backward pass - to be called during training.
*/
virtual void Backward_cpu();
// A quick hack. This should be properly encapsulated.
vector <shared_ptr < Blob< Dtype > > >& blobs() {
return blobs_ ;
}
protected :
vector <shared_ptr < Blob< Dtype > > > blobs_;
int count_ ;
int num_ ;
int channels_ ;
int height_ ;
int width_ ;
int num_pixels_ ;
Blob <Dtype > spatial_out_blob_ ;
Blob <Dtype > bilateral_out_blob_ ;
Blob <Dtype > pairwise_ ;
Blob <Dtype > softmax_input_ ;
Blob <Dtype > prob_ ;
Blob <Dtype > message_passing_ ;
vector <Blob < Dtype>*> softmax_top_vec_ ;
vector <Blob < Dtype>*> softmax_bottom_vec_ ;
vector <Blob < Dtype>*> sum_top_vec_;
vector <Blob < Dtype>*> sum_bottom_vec_ ;
shared_ptr <SoftmaxLayer < Dtype> > softmax_layer_ ;
shared_ptr <EltwiseLayer < Dtype> > sum_layer_ ;
shared_ptr <ModifiedPermutohedral > spatial_lattice_ ;
const vector < shared_ptr< ModifiedPermutohedral > >* bilateral_lattices_ ;
const Blob < Dtype>* spatial_norm_;
const Blob < Dtype>* bilateral_norms_ ;
};
} // namespace caffe
#endif //CAFFE_MEANFIELD_ITERATION_HPP_
将caffe/layers/meanfield_iteration .cpp都复制到caffe-windows对应的目录下
然后将其中的include的vision_layers.hpp修改为meanfield_iteration .hpp
修改后的meanfield_iteration .cpp文件如下所示:
/*!
* \brief A helper class for {@link MultiStageMeanfieldLayer} class, which is the Caffe layer that implements the
* CRF-RNN described in the paper: Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* This class itself is not a proper Caffe layer although it behaves like one to some degree.
*
* \authors Sadeep Jayasumana, Bernardino Romera-Paredes, Shuai Zheng, Zhizhong Su.
* \version 1.0
* \date 2015
* \copyright Torr Vision Group, University of Oxford.
* \details If you use this code, please consider citing the paper:
* Shuai Zheng, Sadeep Jayasumana, Bernardino Romera-Paredes, Vibhav Vineet, Zhizhong Su, Dalong Du,
* Chang Huang, Philip H. S. Torr. Conditional Random Fields as Recurrent Neural Networks. IEEE ICCV 2015.
*
* For more information about CRF-RNN, please visit the project website http://crfasrnn.torr.vision.
*/
#include <vector>
#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/layers/meanfield_iteration.hpp"
namespace caffe {
/**
* To be invoked once only immediately after construction.
*/
template <typename Dtype>
void MeanfieldIteration <Dtype >:: OneTimeSetUp(
Blob <Dtype >* const unary_terms,
Blob <Dtype >* const softmax_input,
Blob <Dtype >* const output_blob,
const shared_ptr < ModifiedPermutohedral > spatial_lattice ,
const Blob < Dtype>* const spatial_norm ) {
spatial_lattice_ = spatial_lattice ;
spatial_norm_ = spatial_norm;
count_ = unary_terms-> count ();
num_ = unary_terms-> num ();
channels_ = unary_terms-> channels ();
height_ = unary_terms-> height ();
width_ = unary_terms-> width ();
num_pixels_ = height_ * width_ ;
if ( this ->blobs_ . size() > 0 ) {
LOG (INFO ) << "Meanfield iteration skipping parameter initialization." ;
} else {
blobs_ .resize ( 3);
blobs_ [0 ]. reset( new Blob < Dtype>( 1 , 1 , channels_ , channels_ )); // spatial kernel weight
blobs_ [1 ]. reset( new Blob < Dtype>( 1 , 1 , channels_ , channels_ )); // bilateral kernel weight
blobs_ [2 ]. reset( new Blob < Dtype>( 1 , 1 , channels_ , channels_ )); // compatibility transform matrix
}
pairwise_ .Reshape ( num_, channels_, height_, width_);
spatial_out_blob_ . Reshape( num_ , channels_ , height_ , width_ );
bilateral_out_blob_ . Reshape( num_ , channels_ , height_ , width_ );
message_passing_ . Reshape( num_ , channels_ , height_ , width_ );
// Softmax layer configuration
softmax_bottom_vec_ . clear();
softmax_bottom_vec_ . push_back( softmax_input );
softmax_top_vec_ . clear();
softmax_top_vec_ . push_back(& prob_ );
LayerParameter softmax_param;
softmax_layer_ . reset( new SoftmaxLayer < Dtype>( softmax_param ));
softmax_layer_ -> SetUp( softmax_bottom_vec_ , softmax_top_vec_ );
// Sum layer configuration
sum_bottom_vec_ . clear();
sum_bottom_vec_ . push_back( unary_terms );
sum_bottom_vec_ . push_back(& pairwise_ );
sum_top_vec_ .clear ();
sum_top_vec_ .push_back ( output_blob);
LayerParameter sum_param;
sum_param .mutable_eltwise_param ()-> add_coeff( Dtype (1. ));
sum_param .mutable_eltwise_param ()-> add_coeff( Dtype (-1. ));
sum_param .mutable_eltwise_param ()-> set_operation( EltwiseParameter_EltwiseOp_SUM );
sum_layer_ .reset ( new EltwiseLayer <Dtype >( sum_param));
sum_layer_ ->SetUp ( sum_bottom_vec_ , sum_top_vec_ );
}
/**
* To be invoked before every call to the Forward_cpu() method.
*/
template <typename Dtype>
void MeanfieldIteration <Dtype >:: PrePass(
const vector < shared_ptr< Blob <Dtype > > >& parameters_to_copy_from ,
const vector < shared_ptr< ModifiedPermutohedral > >* const bilateral_lattices ,
const Blob < Dtype>* const bilateral_norms ) {
bilateral_lattices_ = bilateral_lattices ;
bilateral_norms_ = bilateral_norms ;
// Get copies of the up-to-date parameters.
for ( int i = 0 ; i < parameters_to_copy_from .size (); ++ i ) {
blobs_ [i ]-> CopyFrom(*( parameters_to_copy_from [ i]. get ()));
}
}
/**
* Forward pass during the inference.
*/
template <typename Dtype>
void MeanfieldIteration <Dtype >:: Forward_cpu() {
//------------------------------- Softmax normalization--------------------
softmax_layer_ -> Forward( softmax_bottom_vec_ , softmax_top_vec_ );
//-----------------------------------Message passing-----------------------
for ( int n = 0 ; n < num_; ++n ) {
Dtype * spatial_out_data = spatial_out_blob_ . mutable_cpu_data () + spatial_out_blob_ . offset( n );
const Dtype * prob_input_data = prob_ .cpu_data () + prob_. offset (n );
spatial_lattice_ -> compute( spatial_out_data , prob_input_data , channels_ , false );
// Pixel-wise normalization.
for ( int channel_id = 0 ; channel_id < channels_; ++channel_id ) {
caffe_mul (num_pixels_ , spatial_norm_ -> cpu_data(),
spatial_out_data + channel_id * num_pixels_,
spatial_out_data + channel_id * num_pixels_);
}
Dtype * bilateral_out_data = bilateral_out_blob_ . mutable_cpu_data () + bilateral_out_blob_ . offset( n );
(* bilateral_lattices_ )[n ]-> compute( bilateral_out_data , prob_input_data , channels_ , false );
// Pixel-wise normalization.
for ( int channel_id = 0 ; channel_id < channels_; ++channel_id ) {
caffe_mul (num_pixels_ , bilateral_norms_ -> cpu_data() + bilateral_norms_ -> offset( n ),
bilateral_out_data + channel_id * num_pixels_,
bilateral_out_data + channel_id * num_pixels_);
}
}
caffe_set (count_ , Dtype ( 0.), message_passing_ .mutable_cpu_data ());
for ( int n = 0 ; n < num_; ++n ) {
caffe_cpu_gemm < Dtype>( CblasNoTrans , CblasNoTrans , channels_ , num_pixels_ , channels_ , ( Dtype) 1.,
this ->blobs_ [ 0]-> cpu_data (), spatial_out_blob_ . cpu_data() + spatial_out_blob_ . offset( n ), ( Dtype) 0.,
message_passing_ . mutable_cpu_data () + message_passing_ . offset( n ));
}
for ( int n = 0 ; n < num_; ++n ) {
caffe_cpu_gemm < Dtype>( CblasNoTrans , CblasNoTrans , channels_ , num_pixels_ , channels_ , ( Dtype) 1.,
this ->blobs_ [ 1]-> cpu_data (), bilateral_out_blob_ . cpu_data() + bilateral_out_blob_ . offset( n ), ( Dtype) 1.,
message_passing_ . mutable_cpu_data () + message_passing_ . offset( n ));
}
//--------------------------- Compatibility multiplication ----------------
//Result from message passing needs to be multiplied with compatibility values.
for ( int n = 0 ; n < num_; ++n ) {
caffe_cpu_gemm < Dtype>( CblasNoTrans , CblasNoTrans , channels_ , num_pixels_ ,
channels_ , ( Dtype) 1., this-> blobs_ [2 ]-> cpu_data(),
message_passing_ . cpu_data() + message_passing_ . offset( n ), ( Dtype) 0.,
pairwise_ .mutable_cpu_data () + pairwise_. offset (n ));
}
//------------------------- Adding unaries, normalization is left to the next iteration --------------
// Add unary
sum_layer_ ->Forward ( sum_bottom_vec_ , sum_top_vec_ );
}
template < typename Dtype >
void MeanfieldIteration <Dtype >:: Backward_cpu() {
//---------------------------- Add unary gradient --------------------------
vector <bool > eltwise_propagate_down ( 2, true);
sum_layer_ ->Backward ( sum_top_vec_, eltwise_propagate_down , sum_bottom_vec_ );
//---------------------------- Update compatibility diffs ------------------
caffe_set (this -> blobs_[ 2 ]->count (), Dtype ( 0.), this-> blobs_ [2 ]-> mutable_cpu_diff ());
for ( int n = 0 ; n < num_; ++n ) {
caffe_cpu_gemm < Dtype>( CblasNoTrans , CblasTrans , channels_ , channels_ ,
num_pixels_ , ( Dtype) 1., pairwise_. cpu_diff () + pairwise_ . offset( n ),
message_passing_ . cpu_data() + message_passing_ . offset( n ), ( Dtype) 1.,
this ->blobs_ [ 2]-> mutable_cpu_diff ());
}
//-------------------------- Gradient after compatibility transform--- -----
for ( int n = 0 ; n < num_; ++n ) {
caffe_cpu_gemm < Dtype>( CblasTrans , CblasNoTrans , channels_ , num_pixels_ ,
channels_ , ( Dtype) 1., this-> blobs_ [2 ]-> cpu_data(),
pairwise_ .cpu_diff () + pairwise_. offset (n ), ( Dtype ) 0. ,
message_passing_ . mutable_cpu_diff () + message_passing_ . offset( n ));
}
// ------------------------- Gradient w.r.t. kernels weights ------------
caffe_set (this -> blobs_[ 0 ]->count (), Dtype ( 0.), this-> blobs_ [0 ]-> mutable_cpu_diff ());
caffe_set (this -> blobs_[ 1 ]->count (), Dtype ( 0.), this-> blobs_ [1 ]-> mutable_cpu_diff ());
for ( int n = 0 ; n < num_; ++n ) {
caffe_cpu_gemm < Dtype>( CblasNoTrans , CblasTrans , channels_ , channels_ ,
num_pixels_ , ( Dtype) 1., message_passing_ .cpu_diff () + message_passing_ .offset ( n),
spatial_out_blob_ . cpu_data() + spatial_out_blob_ . offset( n ), ( Dtype) 1.,
this ->blobs_ [ 0]-> mutable_cpu_diff ());
}
for ( int n = 0 ; n < num_; ++n ) {
caffe_cpu_gemm < Dtype>( CblasNoTrans , CblasTrans , channels_ , channels_ ,
num_pixels_ , ( Dtype) 1., message_passing_ .cpu_diff () + message_passing_ .offset ( n),
bilateral_out_blob_ . cpu_data() + bilateral_out_blob_ . offset( n ), ( Dtype) 1.,
this ->blobs_ [ 1]-> mutable_cpu_diff ());
}
/*Dtype* tmp = new Dtype[count_];
caffe_mul<Dtype>(count_, message_passing_.cpu_diff(), spatial_out_blob_.cpu_data(), tmp);
for (int c = 0; c < count_; ++c) {
(this->blobs_[0]->mutable_cpu_diff())[0] += tmp[c];
}
caffe_mul<Dtype>(count_, message_passing_.cpu_diff(), bilateral_out_blob_.cpu_data(), tmp);
for (int c = 0; c < count_; ++c) {
(this->blobs_[1]->mutable_cpu_diff())[0] += tmp[c];
}
delete[] tmp;*/
// TODO: Check whether there's a way to improve the accuracy of this calculation.
for ( int n = 0 ; n < num_; ++n ) {
caffe_cpu_gemm < Dtype>( CblasTrans , CblasNoTrans , channels_ , num_pixels_ , channels_ , ( Dtype) 1.,
this ->blobs_ [ 0]-> cpu_data (), message_passing_ . cpu_diff() + message_passing_ . offset( n ),
( Dtype) 0.,
spatial_out_blob_ . mutable_cpu_diff () + spatial_out_blob_ . offset( n ));
}
//caffe_cpu_scale<Dtype>(count_, (this->blobs_[0]->cpu_data())[0],
// message_passing_.cpu_diff(), spatial_out_blob_.mutable_cpu_diff());
for ( int n = 0 ; n < num_; ++n ) {
caffe_cpu_gemm < Dtype>( CblasTrans , CblasNoTrans , channels_ , num_pixels_ , channels_ , ( Dtype) 1.,
this ->blobs_ [ 1]-> cpu_data (), message_passing_ . cpu_diff() + message_passing_ . offset( n ),
( Dtype) 0.,
bilateral_out_blob_ . mutable_cpu_diff () + bilateral_out_blob_ . offset( n ));
}
//caffe_cpu_scale<Dtype>(count_, (this->blobs_[1]->cpu_data())[0],
// message_passing_.cpu_diff(), bilateral_out_blob_.mutable_cpu_diff());
//---------------------------- BP thru normalization --------------------------
for ( int n = 0 ; n < num_; ++n ) {
Dtype * spatial_out_diff = spatial_out_blob_ . mutable_cpu_diff () + spatial_out_blob_ . offset( n );
for ( int channel_id = 0 ; channel_id < channels_; ++channel_id ) {
caffe_mul (num_pixels_ , spatial_norm_ -> cpu_data(),
spatial_out_diff + channel_id * num_pixels_,
spatial_out_diff + channel_id * num_pixels_);
}
Dtype * bilateral_out_diff = bilateral_out_blob_ . mutable_cpu_diff () + bilateral_out_blob_ . offset( n );
for ( int channel_id = 0 ; channel_id < channels_; ++channel_id ) {
caffe_mul (num_pixels_ , bilateral_norms_ -> cpu_data() + bilateral_norms_ -> offset( n ),
bilateral_out_diff + channel_id * num_pixels_,
bilateral_out_diff + channel_id * num_pixels_);
}
}
//--------------------------- Gradient for message passing ---------------
for ( int n = 0 ; n < num_; ++n ) {
spatial_lattice_ -> compute( prob_ .mutable_cpu_diff () + prob_. offset (n ),
spatial_out_blob_ . cpu_diff() + spatial_out_blob_ . offset( n ), channels_ ,
true , false );
(* bilateral_lattices_ )[n ]-> compute( prob_ .mutable_cpu_diff () + prob_. offset (n ),
bilateral_out_blob_ . cpu_diff() + bilateral_out_blob_ . offset( n ),
channels_ , true , true );
}
//--------------------------------------------------------------------------------
vector <bool > propagate_down ( 2, true);
softmax_layer_ -> Backward( softmax_top_vec_ , propagate_down , softmax_bottom_vec_ );
}
INSTANTIATE_CLASS ( MeanfieldIteration );
} // namespace caffe
(4)移植CRF层的参数到caffe.proto文件
修改/src/proto/caffe.proto文件,加入CRF层的参数,并且在LayerParameter中以及V1LayerParameter中加入新的层的参数,由于最新的Caffe有CropLayer,因此也要将这个新层也加入进去。
syntax = "proto2";
package caffe;
// Specifies the shape (dimensions) of a Blob.
message BlobShape {
repeated int64 dim = 1 [packed = true];
}
message BlobProto {
optional BlobShape shape = 7;
repeated float data = 5 [packed = true];
repeated float diff = 6 [packed = true];
repeated double double_data = 8 [packed = true];
repeated double double_diff = 9 [packed = true];
// 4D dimensions -- deprecated. Use "shape" instead.
optional int32 num = 1 [default = 0];
optional int32 channels = 2 [default = 0];
optional int32 height = 3 [default = 0];
optional int32 width = 4 [default = 0];
}
// The BlobProtoVector is simply a way to pass multiple blobproto instances
// around.
message BlobProtoVector {
repeated BlobProto blobs = 1;
}
message Datum {
optional int32 channels = 1;
optional int32 height = 2;
optional int32 width = 3;
// the actual image data, in bytes
optional bytes data = 4;
optional int32 label = 5;
// Optionally, the datum could also hold float data.
repeated float float_data = 6;
// If true data contains an encoded image that need to be decoded
optional bool encoded = 7 [default = false];
}
message FillerParameter {
// The filler type.
optional string type = 1 [default = 'constant'];
optional float value = 2 [default = 0]; // the value in constant filler
optional float min = 3 [default = 0]; // the min value in uniform filler
optional float max = 4 [default = 1]; // the max value in uniform filler
optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
optional float std = 6 [default = 1]; // the std value in Gaussian filler
// The expected number of non-zero output weights for a given input in
// Gaussian filler -- the default -1 means don't perform sparsification.
optional int32 sparse = 7 [default = -1];
// Normalize the filler variance by fan_in, fan_out, or their average.
// Applies to 'xavier' and 'msra' fillers.
enum VarianceNorm {
FAN_IN = 0;
FAN_OUT = 1;
AVERAGE = 2;
}
optional VarianceNorm variance_norm = 8 [default = FAN_IN];
}
message NetParameter {
optional string name = 1; // consider giving the network a name
// The input blobs to the network.
repeated string input = 3;
// The shape of the input blobs.
repeated BlobShape input_shape = 8;
// 4D input dimensions -- deprecated. Use "shape" instead.
// If specified, for each input blob there should be four
// values specifying the num, channels, height and width of the input blob.
// Thus, there should be a total of (4 * #input) numbers.
repeated int32 input_dim = 4;
// Whether the network will force every layer to carry out backward operation.
// If set False, then whether to carry out backward is determined
// automatically according to the net structure and learning rates.
optional bool force_backward = 5 [default = false];
// The current "state" of the network, including the phase, level, and stage.
// Some layers may be included/excluded depending on this state and the states
// specified in the layers' include and exclude fields.
optional NetState state = 6;
// Print debugging information about results while running Net::Forward,
// Net::Backward, and Net::Update.
optional bool debug_info = 7 [default = false];
// The layers that make up the net. Each of their configurations, including
// connectivity and behavior, is specified as a LayerParameter.
repeated LayerParameter layer = 100; // ID 100 so layers are printed last.
// DEPRECATED: use 'layer' instead.
repeated V1LayerParameter layers = 2;
}
// NOTE
// Update the next available ID when you add a new SolverParameter field.
//
// SolverParameter next available ID: 41 (last added: type)
message SolverParameter {
//////////////////////////////////////////////////////////////////////////////
// Specifying the train and test networks
//
// Exactly one train net must be specified using one of the following fields:
// train_net_param, train_net, net_param, net
// One or more test nets may be specified using any of the following fields:
// test_net_param, test_net, net_param, net
// If more than one test net field is specified (e.g., both net and
// test_net are specified), they will be evaluated in the field order given
// above: (1) test_net_param, (2) test_net, (3) net_param/net.
// A test_iter must be specified for each test_net.
// A test_level and/or a test_stage may also be specified for each test_net.
//////////////////////////////////////////////////////////////////////////////
// Proto filename for the train net, possibly combined with one or more
// test nets.
optional string net = 24;
// Inline train net param, possibly combined with one or more test nets.
optional NetParameter net_param = 25;
optional string train_net = 1; // Proto filename for the train net.
repeated string test_net = 2; // Proto filenames for the test nets.
optional NetParameter train_net_param = 21; // Inline train net params.
repeated NetParameter test_net_param = 22; // Inline test net params.
// The states for the train/test nets. Must be unspecified or
// specified once per net.
//
// By default, all states will have solver = true;
// train_state will have phase = TRAIN,
// and all test_state's will have phase = TEST.
// Other defaults are set according to the NetState defaults.
optional NetState train_state = 26;
repeated NetState test_state = 27;
// The number of iterations for each test net.
repeated int32 test_iter = 3;
// The number of iterations between two testing phases.
optional int32 test_interval = 4 [default = 0];
optional bool test_compute_loss = 19 [default = false];
// If true, run an initial test pass before the first iteration,
// ensuring memory availability and printing the starting value of the loss.
optional bool test_initialization = 32 [default = true];
optional float base_lr = 5; // The base learning rate
// the number of iterations between displaying info. If display = 0, no info
// will be displayed.
optional int32 display = 6;
// Display the loss averaged over the last average_loss iterations
optional int32 average_loss = 33 [default = 1];
optional int32 max_iter = 7; // the maximum number of iterations
// accumulate gradients over `iter_size` x `batch_size` instances
optional int32 iter_size = 36 [default = 1];
// The learning rate decay policy. The currently implemented learning rate
// policies are as follows:
// - fixed: always return base_lr.
// - step: return base_lr * gamma ^ (floor(iter / step))
// - exp: return base_lr * gamma ^ iter
// - inv: return base_lr * (1 + gamma * iter) ^ (- power)
// - multistep: similar to step but it allows non uniform steps defined by
// stepvalue
// - poly: the effective learning rate follows a polynomial decay, to be
// zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
// - sigmoid: the effective learning rate follows a sigmod decay
// return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
//
// where base_lr, max_iter, gamma, step, stepvalue and power are defined
// in the solver parameter protocol buffer, and iter is the current iteration.
optional string lr_policy = 8;
optional float gamma = 9; // The parameter to compute the learning rate.
optional float power = 10; // The parameter to compute the learning rate.
optional float momentum = 11; // The momentum value.
optional float weight_decay = 12; // The weight decay.
// regularization types supported: L1 and L2
// controlled by weight_decay
optional string regularization_type = 29 [default = "L2"];
// the stepsize for learning rate policy "step"
optional int32 stepsize = 13;
// the stepsize for learning rate policy "multistep"
repeated int32 stepvalue = 34;
// Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
// whenever their actual L2 norm is larger.
optional float clip_gradients = 35 [default = -1];
optional int32 snapshot = 14 [default = 0]; // The snapshot interval
optional string snapshot_prefix = 15; // The prefix for the snapshot.
// whether to snapshot diff in the results or not. Snapshotting diff will help
// debugging but the final protocol buffer size will be much larger.
optional bool snapshot_diff = 16 [default = false];
enum SnapshotFormat {
HDF5 = 0;
BINARYPROTO = 1;
}
optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
// the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
enum SolverMode {
CPU = 0;
GPU = 1;
}
optional SolverMode solver_mode = 17 [default = GPU];
// the device_id will that be used in GPU mode. Use device_id = 0 in default.
optional int32 device_id = 18 [default = 0];
// If non-negative, the seed with which the Solver will initialize the Caffe
// random number generator -- useful for reproducible results. Otherwise,
// (and by default) initialize using a seed derived from the system clock.
optional int64 random_seed = 20 [default = -1];
// type of the solver
optional string type = 40 [default = "SGD"];
// numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
optional float delta = 31 [default = 1e-8];
// parameters for the Adam solver
optional float momentum2 = 39 [default = 0.999];
// RMSProp decay value
// MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
optional float rms_decay = 38;
// If true, print information about the state of the net that may help with
// debugging learning problems.
optional bool debug_info = 23 [default = false];
// If false, don't save a snapshot after training finishes.
optional bool snapshot_after_train = 28 [default = true];
// DEPRECATED: old solver enum types, use string instead
enum SolverType {
SGD = 0;
NESTEROV = 1;
ADAGRAD = 2;
RMSPROP = 3;
ADADELTA = 4;
ADAM = 5;
}
// DEPRECATED: use type instead of solver_type
optional SolverType solver_type = 30 [default = SGD];
}
// A message that stores the solver snapshots
message SolverState {
optional int32 iter = 1; // The current iteration
optional string learned_net = 2; // The file that stores the learned net.
repeated BlobProto history = 3; // The history for sgd solvers
optional int32 current_step = 4 [default = 0]; // The current step for learning rate
}
enum Phase {
TRAIN = 0;
TEST = 1;
}
message NetState {
optional Phase phase = 1 [default = TEST];
optional int32 level = 2 [default = 0];
repeated string stage = 3;
}
message NetStateRule {
// Set phase to require the NetState have a particular phase (TRAIN or TEST)
// to meet this rule.
optional Phase phase = 1;
// Set the minimum and/or maximum levels in which the layer should be used.
// Leave undefined to meet the rule regardless of level.
optional int32 min_level = 2;
optional int32 max_level = 3;
// Customizable sets of stages to include or exclude.
// The net must have ALL of the specified stages and NONE of the specified
// "not_stage"s to meet the rule.
// (Use multiple NetStateRules to specify conjunctions of stages.)
repeated string stage = 4;
repeated string not_stage = 5;
}
// Specifies training parameters (multipliers on global learning constants,
// and the name and other settings used for weight sharing).
message ParamSpec {
// The names of the parameter blobs -- useful for sharing parameters among
// layers, but never required otherwise. To share a parameter between two
// layers, give it a (non-empty) name.
optional string name = 1;
// Whether to require shared weights to have the same shape, or just the same
// count -- defaults to STRICT if unspecified.
optional DimCheckMode share_mode = 2;
enum DimCheckMode {
// STRICT (default) requires that num, channels, height, width each match.
STRICT = 0;
// PERMISSIVE requires only the count (num*channels*height*width) to match.
PERMISSIVE = 1;
}
// The multiplier on the global learning rate for this parameter.
optional float lr_mult = 3 [default = 1.0];
// The multiplier on the global weight decay for this parameter.
optional float decay_mult = 4 [default = 1.0];
}
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 143 (last added: scale_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
repeated string bottom = 3; // the name of each bottom blob
repeated string top = 4; // the name of each top blob
// The train / test phase for computation.
optional Phase phase = 10;
// The amount of weight to assign each top blob in the objective.
// Each layer assigns a default value, usually of either 0 or 1,
// to each top blob.
repeated float loss_weight = 5;
// Specifies training parameters (multipliers on global learning constants,
// and the name and other settings used for weight sharing).
repeated ParamSpec param = 6;
// The blobs containing the numeric parameters of the layer.
repeated BlobProto blobs = 7;
// Specifies on which bottoms the backpropagation should be skipped.
// The size must be either 0 or equal to the number of bottoms.
repeated bool propagate_down = 11;
// Rules controlling whether and when a layer is included in the network,
// based on the current NetState. You may specify a non-zero number of rules
// to include OR exclude, but not both. If no include or exclude rules are
// specified, the layer is always included. If the current NetState meets
// ANY (i.e., one or more) of the specified rules, the layer is
// included/excluded.
repeated NetStateRule include = 8;
repeated NetStateRule exclude = 9;
// Parameters for data pre-processing.
optional TransformationParameter transform_param = 100;
// Parameters shared by loss layers.
optional LossParameter loss_param = 101;
// Layer type-specific parameters.
//
// Note: certain layers may have more than one computational engine
// for their implementation. These layers include an Engine type and
// engine parameter for selecting the implementation.
// The default for the engine is set by the ENGINE switch at compile-time.
optional AccuracyParameter accuracy_param = 102;
optional ArgMaxParameter argmax_param = 103;
optional BatchNormParameter batch_norm_param = 139;
optional BiasParameter bias_param = 141;
optional ConcatParameter concat_param = 104;
optional ContrastiveLossParameter contrastive_loss_param = 105;
optional ConvolutionParameter convolution_param = 106;
optional CropParameter crop_param = 203;
optional DataParameter data_param = 107;
optional DropoutParameter dropout_param = 108;
optional DummyDataParameter dummy_data_param = 109;
optional EltwiseParameter eltwise_param = 110;
optional ELUParameter elu_param = 140;
optional EmbedParameter embed_param = 137;
optional ExpParameter exp_param = 111;
optional FlattenParameter flatten_param = 135;
optional HDF5DataParameter hdf5_data_param = 112;
optional HDF5OutputParameter hdf5_output_param = 113;
optional HingeLossParameter hinge_loss_param = 114;
optional ImageDataParameter image_data_param = 115;
optional InfogainLossParameter infogain_loss_param = 116;
optional InnerProductParameter inner_product_param = 117;
optional LogParameter log_param = 134;
optional LRNParameter lrn_param = 118;
optional LSTMParameter lstm_param = 201;
optional MultiStageMeanfieldParameter multi_stage_meanfield_param = 202;
optional MemoryDataParameter memory_data_param = 119;
optional MVNParameter mvn_param = 120;
optional PoolingParameter pooling_param = 121;
optional PowerParameter power_param = 122;
optional PReLUParameter prelu_param = 131;
optional PythonParameter python_param = 130;
optional ReductionParameter reduction_param = 136;
optional ReLUParameter relu_param = 123;
optional ReshapeParameter reshape_param = 133;
optional ScaleParameter scale_param = 142;
optional JointImageParameter joint_image_param = 900;
optional SlideWindowParameter slide_window_param = 901;
optional SigmoidParameter sigmoid_param = 124;
optional SoftmaxParameter softmax_param = 125;
optional SPPParameter spp_param = 132;
optional SliceParameter slice_param = 126;
optional TanHParameter tanh_param = 127;
optional ThresholdParameter threshold_param = 128;
optional TileParameter tile_param = 138;
optional WindowDataParameter window_data_param = 129;
optional MILDataParameter mil_data_param = 0x004d4944; //"MID"
optional MILParameter mil_param = 0x004d494c; //"MIL"
}
message SlideWindowParameter{
optional uint32 slidetype = 1 [default=1];
}
// Message that stores parameters used by LSTMParameter
message LSTMParameter {
optional uint32 num_output = 1; // The number of outputs for the layer
optional float clipping_threshold = 2 [default = 0.0];
optional FillerParameter weight_filler = 3; // The filler for weight
optional FillerParameter bias_filler = 4; // The filler for the bias
optional uint32 batch_size = 5 [default = 1];
}
// Message that stores parameters used to apply transformation
// to the data layer's data
message TransformationParameter {
// For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried
// out before scaling.
optional float scale = 1 [default = 1];
// Specify if we want to randomly mirror data.
optional bool mirror = 2 [default = false];
// Specify if we would like to randomly crop an image.
optional uint32 crop_size = 3 [default = 0];
// mean_file and mean_value cannot be specified at the same time
optional string mean_file = 4;
// if specified can be repeated once (would substract it from all the channels)
// or can be repeated the same number of times as channels
// (would subtract them from the corresponding channel)
repeated float mean_value = 5;
// Force the decoded image to have 3 color channels.
optional bool force_color = 6 [default = false];
// Force the decoded image to have 1 color channels.
optional bool force_gray = 7 [default = false];
}
// Message that stores parameters shared by loss layers
message LossParameter {
// If specified, ignore instances with the given label.
optional int32 ignore_label = 1;
// How to normalize the loss for loss layers that aggregate across batches,
// spatial dimensions, or other dimensions. Currently only implemented in
// SoftmaxWithLoss layer.
enum NormalizationMode {
// Divide by the number of examples in the batch times spatial dimensions.
// Outputs that receive the ignore label will NOT be ignored in computing
// the normalization factor.
FULL = 0;
// Divide by the total number of output locations that do not take the
// ignore_label. If ignore_label is not set, this behaves like FULL.
VALID = 1;
// Divide by the batch size.
BATCH_SIZE = 2;
// Do not normalize the loss.
NONE = 3;
}
optional NormalizationMode normalization = 3 [default = VALID];
// Deprecated. Ignored if normalization is specified. If normalization
// is not specified, then setting this to false will be equivalent to
// normalization = BATCH_SIZE to be consistent with previous behavior.
optional bool normalize = 2;
}
// Messages that store parameters used by individual layer types follow, in
// alphabetical order.
message AccuracyParameter {
// When computing accuracy, count as correct by comparing the true label to
// the top k scoring classes. By default, only compare to the top scoring
// class (i.e. argmax).
optional uint32 top_k = 1 [default = 1];
// The "label" axis of the prediction blob, whose argmax corresponds to the
// predicted label -- may be negative to index from the end (e.g., -1 for the
// last axis). For example, if axis == 1 and the predictions are
// (N x C x H x W), the label blob is expected to contain N*H*W ground truth
// labels with integer values in {0, 1, ..., C-1}.
optional int32 axis = 2 [default = 1];
// If specified, ignore instances with the given label.
optional int32 ignore_label = 3;
}
message ArgMaxParameter {
// If true produce pairs (argmax, maxval)
optional bool out_max_val = 1 [default = false];
optional uint32 top_k = 2 [default = 1];
// The axis along which to maximise -- may be negative to index from the
// end (e.g., -1 for the last axis).
// By default ArgMaxLayer maximizes over the flattened trailing dimensions
// for each index of the first / num dimension.
optional int32 axis = 3;
}
message ConcatParameter {
// The axis along which to concatenate -- may be negative to index from the
// end (e.g., -1 for the last axis). Other axes must have the
// same dimension for all the bottom blobs.
// By default, ConcatLayer concatenates blobs along the "channels" axis (1).
optional int32 axis = 2 [default = 1];
// DEPRECATED: alias for "axis" -- does not support negative indexing.
optional uint32 concat_dim = 1 [default = 1];
}
message BatchNormParameter {
// If false, accumulate global mean/variance values via a moving average. If
// true, use those accumulated values instead of computing mean/variance
// across the batch.
optional bool use_global_stats = 1;
// How much does the moving average decay each iteration?
optional float moving_average_fraction = 2 [default = .999];
// Small value to add to the variance estimate so that we don't divide by
// zero.
optional float eps = 3 [default = 1e-5];
}
message BiasParameter {
// The first axis of bottom[0] (the first input Blob) along which to apply
// bottom[1] (the second input Blob). May be negative to index from the end
// (e.g., -1 for the last axis).
//
// For example, if bottom[0] is 4D with shape 100x3x40x60, the output
// top[0] will have the same shape, and bottom[1] may have any of the
// following shapes (for the given value of axis):
// (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
// (axis == 1 == -3) 3; 3x40; 3x40x60
// (axis == 2 == -2) 40; 40x60
// (axis == 3 == -1) 60
// Furthermore, bottom[1] may have the empty shape (regardless of the value of
// "axis") -- a scalar bias.
optional int32 axis = 1 [default = 1];
// (num_axes is ignored unless just one bottom is given and the bias is
// a learned parameter of the layer. Otherwise, num_axes is determined by the
// number of axes by the second bottom.)
// The number of axes of the input (bottom[0]) covered by the bias
// parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
// Set num_axes := 0, to add a zero-axis Blob: a scalar.
optional int32 num_axes = 2 [default = 1];
// (filler is ignored unless just one bottom is given and the bias is
// a learned parameter of the layer.)
// The initialization for the learned bias parameter.
// Default is the zero (0) initialization, resulting in the BiasLayer
// initially performing the identity operation.
optional FillerParameter filler = 3;
}
message ContrastiveLossParameter {
// margin for dissimilar pair
optional float margin = 1 [default = 1.0];
// The first implementation of this cost did not exactly match the cost of
// Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
// legacy_version = false (the default) uses (margin - d)^2 as proposed in the
// Hadsell paper. New models should probably use this version.
// legacy_version = true uses (margin - d^2). This is kept to support /
// reproduce existing models and results
optional bool legacy_version = 2 [default = false];
}
message ConvolutionParameter {
optional uint32 num_output = 1; // The number of outputs for the layer
optional bool bias_term = 2 [default = true]; // whether to have bias terms
// Pad, kernel size, and stride are all given as a single value for equal
// dimensions in all spatial dimensions, or once per spatial dimension.
repeated uint32 pad = 3; // The padding size; defaults to 0
repeated uint32 kernel_size = 4; // The kernel size
repeated uint32 stride = 6; // The stride; defaults to 1
// Factor used to dilate the kernel, (implicitly) zero-filling the resulting
// holes. (Kernel dilation is sometimes referred to by its use in the
// algorithme à trous from Holschneider et al. 1987.)
repeated uint32 dilation = 18; // The dilation; defaults to 1
// For 2D convolution only, the *_h and *_w versions may also be used to
// specify both spatial dimensions.
optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
optional uint32 kernel_h = 11; // The kernel height (2D only)
optional uint32 kernel_w = 12; // The kernel width (2D only)
optional uint32 stride_h = 13; // The stride height (2D only)
optional uint32 stride_w = 14; // The stride width (2D only)
optional uint32 group = 5 [default = 1]; // The group size for group conv
optional FillerParameter weight_filler = 7; // The filler for the weight
optional FillerParameter bias_filler = 8; // The filler for the bias
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 15 [default = DEFAULT];
// The axis to interpret as "channels" when performing convolution.
// Preceding dimensions are treated as independent inputs;
// succeeding dimensions are treated as "spatial".
// With (N, C, H, W) inputs, and axis == 1 (the default), we perform
// N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
// groups g>1) filters across the spatial axes (H, W) of the input.
// With (N, C, D, H, W) inputs, and axis == 1, we perform
// N independent 3D convolutions, sliding (C/g)-channels
// filters across the spatial axes (D, H, W) of the input.
optional int32 axis = 16 [default = 1];
// Whether to force use of the general ND convolution, even if a specific
// implementation for blobs of the appropriate number of spatial dimensions
// is available. (Currently, there is only a 2D-specific convolution
// implementation; for input blobs with num_axes != 2, this option is
// ignored and the ND implementation will be used.)
optional bool force_nd_im2col = 17 [default = false];
}
message CropParameter {
// To crop, elements of the first bottom are selected to fit the dimensions
// of the second, reference bottom. The crop is configured by
// - the crop `axis` to pick the dimensions for cropping
// - the crop `offset` to set the shift for all/each dimension
// to align the cropped bottom with the reference bottom.
// All dimensions up to but excluding `axis` are preserved, while
// the dimensions including and trailing `axis` are cropped.
// If only one `offset` is set, then all dimensions are offset by this amount.
// Otherwise, the number of offsets must equal the number of cropped axes to
// shift the crop in each dimension accordingly.
// Note: standard dimensions are N,C,H,W so the default is a spatial crop,
// and `axis` may be negative to index from the end (e.g., -1 for the last
// axis).
optional int32 axis = 1 [default = 2];
repeated uint32 offset = 2;
}
message DataParameter {
enum DB {
LEVELDB = 0;
LMDB = 1;
}
// Specify the data source.
optional string source = 1;
// Specify the batch size.
optional uint32 batch_size = 4;
// The rand_skip variable is for the data layer to skip a few data points
// to avoid all asynchronous sgd clients to start at the same point. The skip
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the database.
// DEPRECATED. Each solver accesses a different subset of the database.
optional uint32 rand_skip = 7 [default = 0];
optional DB backend = 8 [default = LEVELDB];
// DEPRECATED. See TransformationParameter. For data pre-processing, we can do
// simple scaling and subtracting the data mean, if provided. Note that the
// mean subtraction is always carried out before scaling.
optional float scale = 2 [default = 1];
optional string mean_file = 3;
// DEPRECATED. See TransformationParameter. Specify if we would like to randomly
// crop an image.
optional uint32 crop_size = 5 [default = 0];
// DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
// data.
optional bool mirror = 6 [default = false];
// Force the encoded image to have 3 color channels
optional bool force_encoded_color = 9 [default = false];
// Prefetch queue (Number of batches to prefetch to host memory, increase if
// data access bandwidth varies).
optional uint32 prefetch = 10 [default = 4];
}
message DropoutParameter {
optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
}
// DummyDataLayer fills any number of arbitrarily shaped blobs with random
// (or constant) data generated by "Fillers" (see "message FillerParameter").
message DummyDataParameter {
// This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N
// shape fields, and 0, 1 or N data_fillers.
//
// If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
// If 1 data_filler is specified, it is applied to all top blobs. If N are
// specified, the ith is applied to the ith top blob.
repeated FillerParameter data_filler = 1;
repeated BlobShape shape = 6;
// 4D dimensions -- deprecated. Use "shape" instead.
repeated uint32 num = 2;
repeated uint32 channels = 3;
repeated uint32 height = 4;
repeated uint32 width = 5;
}
message EltwiseParameter {
enum EltwiseOp {
PROD = 0;
SUM = 1;
MAX = 2;
}
optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
repeated float coeff = 2; // blob-wise coefficient for SUM operation
// Whether to use an asymptotically slower (for >2 inputs) but stabler method
// of computing the gradient for the PROD operation. (No effect for SUM op.)
optional bool stable_prod_grad = 3 [default = true];
}
// Message that stores parameters used by ELULayer
message ELUParameter {
// Described in:
// Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
// Deep Network Learning by Exponential Linear Units (ELUs). arXiv
optional float alpha = 1 [default = 1];
}
// Message that stores parameters used by EmbedLayer
message EmbedParameter {
optional uint32 num_output = 1; // The number of outputs for the layer
// The input is given as integers to be interpreted as one-hot
// vector indices with dimension num_input. Hence num_input should be
// 1 greater than the maximum possible input value.
optional uint32 input_dim = 2;
optional bool bias_term = 3 [default = true]; // Whether to use a bias term
optional FillerParameter weight_filler = 4; // The filler for the weight
optional FillerParameter bias_filler = 5; // The filler for the bias
}
// Message that stores parameters used by ExpLayer
message ExpParameter {
// ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
// Or if base is set to the default (-1), base is set to e,
// so y = exp(shift + scale * x).
optional float base = 1 [default = -1.0];
optional float scale = 2 [default = 1.0];
optional float shift = 3 [default = 0.0];
}
/// Message that stores parameters used by FlattenLayer
message FlattenParameter {
// The first axis to flatten: all preceding axes are retained in the output.
// May be negative to index from the end (e.g., -1 for the last axis).
optional int32 axis = 1 [default = 1];
// The last axis to flatten: all following axes are retained in the output.
// May be negative to index from the end (e.g., the default -1 for the last
// axis).
optional int32 end_axis = 2 [default = -1];
}
// Message that stores parameters used by HDF5DataLayer
message HDF5DataParameter {
// Specify the data source.
optional string source = 1;
// Specify the batch size.
optional uint32 batch_size = 2;
// Specify whether to shuffle the data.
// If shuffle == true, the ordering of the HDF5 files is shuffled,
// and the ordering of data within any given HDF5 file is shuffled,
// but data between different files are not interleaved; all of a file's
// data are output (in a random order) before moving onto another file.
optional bool shuffle = 3 [default = false];
}
message HDF5OutputParameter {
optional string file_name = 1;
}
message HingeLossParameter {
enum Norm {
L1 = 1;
L2 = 2;
}
// Specify the Norm to use L1 or L2
optional Norm norm = 1 [default = L1];
}
message ImageDataParameter {
// Specify the data source.
optional string source = 1;
// Specify the batch size.
optional uint32 batch_size = 4 [default = 1];
// The rand_skip variable is for the data layer to skip a few data points
// to avoid all asynchronous sgd clients to start at the same point. The skip
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the database.
optional uint32 rand_skip = 7 [default = 0];
// Whether or not ImageLayer should shuffle the list of files at every epoch.
optional bool shuffle = 8 [default = false];
// It will also resize images if new_height or new_width are not zero.
optional uint32 new_height = 9 [default = 0];
optional uint32 new_width = 10 [default = 0];
// Specify if the images are color or gray
optional bool is_color = 11 [default = true];
// DEPRECATED. See TransformationParameter. For data pre-processing, we can do
// simple scaling and subtracting the data mean, if provided. Note that the
// mean subtraction is always carried out before scaling.
optional float scale = 2 [default = 1];
optional string mean_file = 3;
// DEPRECATED. See TransformationParameter. Specify if we would like to randomly
// crop an image.
optional uint32 crop_size = 5 [default = 0];
// DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
// data.
optional bool mirror = 6 [default = false];
optional string root_folder = 12 [default = ""];
}
message InfogainLossParameter {
// Specify the infogain matrix source.
optional string source = 1;
}
message InnerProductParameter {
optional uint32 num_output = 1; // The number of outputs for the layer
optional bool bias_term = 2 [default = true]; // whether to have bias terms
optional FillerParameter weight_filler = 3; // The filler for the weight
optional FillerParameter bias_filler = 4; // The filler for the bias
// The first axis to be lumped into a single inner product computation;
// all preceding axes are retained in the output.
// May be negative to index from the end (e.g., -1 for the last axis).
optional int32 axis = 5 [default = 1];
}
// Message that stores parameters used by LogLayer
message LogParameter {
// LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
// Or if base is set to the default (-1), base is set to e,
// so y = ln(shift + scale * x) = log_e(shift + scale * x)
optional float base = 1 [default = -1.0];
optional float scale = 2 [default = 1.0];
optional float shift = 3 [default = 0.0];
}
// Message that stores parameters used by LRNLayer
message LRNParameter {
optional uint32 local_size = 1 [default = 5];
optional float alpha = 2 [default = 1.];
optional float beta = 3 [default = 0.75];
enum NormRegion {
ACROSS_CHANNELS = 0;
WITHIN_CHANNEL = 1;
}
optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
optional float k = 5 [default = 1.];
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 6 [default = DEFAULT];
}
message MemoryDataParameter {
optional uint32 batch_size = 1;
optional uint32 channels = 2;
optional uint32 height = 3;
optional uint32 width = 4;
}
// Message that stores parameters used by MultiStageMeanfieldLayer
message MultiStageMeanfieldParameter {
enum Mode {
POTTS = 0;
}
optional Mode compatibility_mode = 1 [default = POTTS];
optional float threshold = 2;
required float theta_alpha = 3 [default = 10.];
required float theta_beta = 4 [default = 10.];
required float theta_gamma = 5 [default = 10.];
required uint32 num_iterations = 6 [default = 1];
optional float spatial_filter_weight = 7 [default = 1];
optional float bilateral_filter_weight = 8 [default = 1];
optional float forced_spatial_filter_weight = 9;
optional float forced_bilateral_filter_weight = 10;
}
message MVNParameter {
// This parameter can be set to false to normalize mean only
optional bool normalize_variance = 1 [default = true];
// This parameter can be set to true to perform DNN-like MVN
optional bool across_channels = 2 [default = false];
// Epsilon for not dividing by zero while normalizing variance
optional float eps = 3 [default = 1e-9];
}
message PoolingParameter {
enum PoolMethod {
MAX = 0;
AVE = 1;
STOCHASTIC = 2;
}
optional PoolMethod pool = 1 [default = MAX]; // The pooling method
// Pad, kernel size, and stride are all given as a single value for equal
// dimensions in height and width or as Y, X pairs.
optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
optional uint32 pad_h = 9 [default = 0]; // The padding height
optional uint32 pad_w = 10 [default = 0]; // The padding width
optional uint32 kernel_size = 2; // The kernel size (square)
optional uint32 kernel_h = 5; // The kernel height
optional uint32 kernel_w = 6; // The kernel width
optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
optional uint32 stride_h = 7; // The stride height
optional uint32 stride_w = 8; // The stride width
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 11 [default = DEFAULT];
// If global_pooling then it will pool over the size of the bottom by doing
// kernel_h = bottom->height and kernel_w = bottom->width
optional bool global_pooling = 12 [default = false];
}
message PowerParameter {
// PowerLayer computes outputs y = (shift + scale * x) ^ power.
optional float power = 1 [default = 1.0];
optional float scale = 2 [default = 1.0];
optional float shift = 3 [default = 0.0];
}
message PythonParameter {
optional string module = 1;
optional string layer = 2;
// This value is set to the attribute `param_str` of the `PythonLayer` object
// in Python before calling the `setup()` method. This could be a number,
// string, dictionary in Python dict format, JSON, etc. You may parse this
// string in `setup` method and use it in `forward` and `backward`.
optional string param_str = 3 [default = ''];
// Whether this PythonLayer is shared among worker solvers during data parallelism.
// If true, each worker solver sequentially run forward from this layer.
// This value should be set true if you are using it as a data layer.
optional bool share_in_parallel = 4 [default = false];
}
// Message that stores parameters used by ReductionLayer
message ReductionParameter {
enum ReductionOp {
SUM = 1;
ASUM = 2;
SUMSQ = 3;
MEAN = 4;
}
optional ReductionOp operation = 1 [default = SUM]; // reduction operation
// The first axis to reduce to a scalar -- may be negative to index from the
// end (e.g., -1 for the last axis).
// (Currently, only reduction along ALL "tail" axes is supported; reduction
// of axis M through N, where N < num_axes - 1, is unsupported.)
// Suppose we have an n-axis bottom Blob with shape:
// (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)).
// If axis == m, the output Blob will have shape
// (d0, d1, d2, ..., d(m-1)),
// and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1))
// times, each including (dm * d(m+1) * ... * d(n-1)) individual data.
// If axis == 0 (the default), the output Blob always has the empty shape
// (count 1), performing reduction across the entire input --
// often useful for creating new loss functions.
optional int32 axis = 2 [default = 0];
optional float coeff = 3 [default = 1.0]; // coefficient for output
}
// Message that stores parameters used by ReLULayer
message ReLUParameter {
// Allow non-zero slope for negative inputs to speed up optimization
// Described in:
// Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
// improve neural network acoustic models. In ICML Workshop on Deep Learning
// for Audio, Speech, and Language Processing.
optional float negative_slope = 1 [default = 0];
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 2 [default = DEFAULT];
}
message ReshapeParameter {
// Specify the output dimensions. If some of the dimensions are set to 0,
// the corresponding dimension from the bottom layer is used (unchanged).
// Exactly one dimension may be set to -1, in which case its value is
// inferred from the count of the bottom blob and the remaining dimensions.
// For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
//
// layer {
// type: "Reshape" bottom: "input" top: "output"
// reshape_param { ... }
// }
//
// If "input" is 2D with shape 2 x 8, then the following reshape_param
// specifications are all equivalent, producing a 3D blob "output" with shape
// 2 x 2 x 4:
//
// reshape_param { shape { dim: 2 dim: 2 dim: 4 } }
// reshape_param { shape { dim: 0 dim: 2 dim: 4 } }
// reshape_param { shape { dim: 0 dim: 2 dim: -1 } }
// reshape_param { shape { dim: -1 dim: 0 dim: 2 } }
//
optional BlobShape shape = 1;
// axis and num_axes control the portion of the bottom blob's shape that are
// replaced by (included in) the reshape. By default (axis == 0 and
// num_axes == -1), the entire bottom blob shape is included in the reshape,
// and hence the shape field must specify the entire output shape.
//
// axis may be non-zero to retain some portion of the beginning of the input
// shape (and may be negative to index from the end; e.g., -1 to begin the
// reshape after the last axis, including nothing in the reshape,
// -2 to include only the last axis, etc.).
//
// For example, suppose "input" is a 2D blob with shape 2 x 8.
// Then the following ReshapeLayer specifications are all equivalent,
// producing a blob "output" with shape 2 x 2 x 4:
//
// reshape_param { shape { dim: 2 dim: 2 dim: 4 } }
// reshape_param { shape { dim: 2 dim: 4 } axis: 1 }
// reshape_param { shape { dim: 2 dim: 4 } axis: -3 }
//
// num_axes specifies the extent of the reshape.
// If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
// input axes in the range [axis, axis+num_axes].
// num_axes may also be -1, the default, to include all remaining axes
// (starting from axis).
//
// For example, suppose "input" is a 2D blob with shape 2 x 8.
// Then the following ReshapeLayer specifications are equivalent,
// producing a blob "output" with shape 1 x 2 x 8.
//
// reshape_param { shape { dim: 1 dim: 2 dim: 8 } }
// reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 }
// reshape_param { shape { dim: 1 } num_axes: 0 }
//
// On the other hand, these would produce output blob shape 2 x 1 x 8:
//
// reshape_param { shape { dim: 2 dim: 1 dim: 8 } }
// reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 }
//
optional int32 axis = 2 [default = 0];
optional int32 num_axes = 3 [default = -1];
}
message ScaleParameter {
// The first axis of bottom[0] (the first input Blob) along which to apply
// bottom[1] (the second input Blob). May be negative to index from the end
// (e.g., -1 for the last axis).
//
// For example, if bottom[0] is 4D with shape 100x3x40x60, the output
// top[0] will have the same shape, and bottom[1] may have any of the
// following shapes (for the given value of axis):
// (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
// (axis == 1 == -3) 3; 3x40; 3x40x60
// (axis == 2 == -2) 40; 40x60
// (axis == 3 == -1) 60
// Furthermore, bottom[1] may have the empty shape (regardless of the value of
// "axis") -- a scalar multiplier.
optional int32 axis = 1 [default = 1];
// (num_axes is ignored unless just one bottom is given and the scale is
// a learned parameter of the layer. Otherwise, num_axes is determined by the
// number of axes by the second bottom.)
// The number of axes of the input (bottom[0]) covered by the scale
// parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
// Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
optional int32 num_axes = 2 [default = 1];
// (filler is ignored unless just one bottom is given and the scale is
// a learned parameter of the layer.)
// The initialization for the learned scale parameter.
// Default is the unit (1) initialization, resulting in the ScaleLayer
// initially performing the identity operation.
optional FillerParameter filler = 3;
// Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
// may be more efficient). Initialized with bias_filler (defaults to 0).
optional bool bias_term = 4 [default = false];
optional FillerParameter bias_filler = 5;
}
message SigmoidParameter {
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 1 [default = DEFAULT];
}
message SliceParameter {
// The axis along which to slice -- may be negative to index from the end
// (e.g., -1 for the last axis).
// By default, SliceLayer concatenates blobs along the "channels" axis (1).
optional int32 axis = 3 [default = 1];
repeated uint32 slice_point = 2;
// DEPRECATED: alias for "axis" -- does not support negative indexing.
optional uint32 slice_dim = 1 [default = 1];
}
message JointImageParameter {
optional string root_folder = 1 [default = ""];
optional string filelistpath = 2 [default = ""];
optional uint32 batchsize = 3 [default = 300];
optional uint32 outsize = 4 [default = 227];
optional uint32 channels = 5 [default = 3];
}
// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
message SoftmaxParameter {
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 1 [default = DEFAULT];
// The axis along which to perform the softmax -- may be negative to index
// from the end (e.g., -1 for the last axis).
// Any other axes will be evaluated as independent softmaxes.
optional int32 axis = 2 [default = 1];
}
message TanHParameter {
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 1 [default = DEFAULT];
}
// Message that stores parameters used by TileLayer
message TileParameter {
// The index of the axis to tile.
optional int32 axis = 1 [default = 1];
// The number of copies (tiles) of the blob to output.
optional int32 tiles = 2;
}
// Message that stores parameters used by ThresholdLayer
message ThresholdParameter {
optional float threshold = 1 [default = 0]; // Strictly positive values
}
// Message that stores parameters used by MILLayer
message MILParameter {
enum MILType {
MAX = 0;
NOR = 1;
}
optional MILType type = 1 [default = MAX]; // The MIL method
}
message WindowDataParameter {
// Specify the data source.
optional string source = 1;
// For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried
// out before scaling.
optional float scale = 2 [default = 1];
optional string mean_file = 3;
// Specify the batch size.
optional uint32 batch_size = 4;
// Specify if we would like to randomly crop an image.
optional uint32 crop_size = 5 [default = 0];
// Specify if we want to randomly mirror data.
optional bool mirror = 6 [default = false];
// Foreground (object) overlap threshold
optional float fg_threshold = 7 [default = 0.5];
// Background (non-object) overlap threshold
optional float bg_threshold = 8 [default = 0.5];
// Fraction of batch that should be foreground objects
optional float fg_fraction = 9 [default = 0.25];
// Amount of contextual padding to add around a window
// (used only by the window_data_layer)
optional uint32 context_pad = 10 [default = 0];
// Mode for cropping out a detection window
// warp: cropped window is warped to a fixed size and aspect ratio
// square: the tightest square around the window is cropped
optional string crop_mode = 11 [default = "warp"];
// cache_images: will load all images in memory for faster access
optional bool cache_images = 12 [default = false];
// append root_folder to locate images
optional string root_folder = 13 [default = ""];
}
message MILDataParameter {
// Specify the data source.
optional string source = 1;
// Number of scales for each image
optional uint32 num_scales = 2 [default = 1];
// Side length ratio between neighbouring scales
optional float scale_factor = 6 [default = 1];
// Number of channels in the image
optional uint32 channels = 4 [default = 3];
// Specify the number of images per batch
optional uint32 images_per_batch = 3;
// Specify the number of classes
optional uint32 n_classes = 5;
// specify the box_dir and label_dir
optional string label_file = 7;
// Root directory which contains all the images
optional string root_dir = 11;
// Extention for the file
optional string ext = 12;
// To randomize or not
optional bool randomize = 13 [default = true];
}
message SPPParameter {
enum PoolMethod {
MAX = 0;
AVE = 1;
STOCHASTIC = 2;
}
optional uint32 pyramid_height = 1;
optional PoolMethod pool = 2 [default = MAX]; // The pooling method
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 6 [default = DEFAULT];
}
// DEPRECATED: use LayerParameter.
message V1LayerParameter {
repeated string bottom = 2;
repeated string top = 3;
optional string name = 4;
repeated NetStateRule include = 32;
repeated NetStateRule exclude = 33;
enum LayerType {
NONE = 0;
ABSVAL = 35;
ACCURACY = 1;
ARGMAX = 30;
BNLL = 2;
CONCAT = 3;
CONTRASTIVE_LOSS = 37;
CONVOLUTION = 4;
CROP = 40;
DATA = 5;
DECONVOLUTION = 39;
DROPOUT = 6;
DUMMY_DATA = 32;
EUCLIDEAN_LOSS = 7;
ELTWISE = 25;
EXP = 38;
FLATTEN = 8;
HDF5_DATA = 9;
HDF5_OUTPUT = 10;
HINGE_LOSS = 28;
IM2COL = 11;
IMAGE_DATA = 12;
INFOGAIN_LOSS = 13;
INNER_PRODUCT = 14;
LRN = 15;
MEMORY_DATA = 29;
MULTI_STAGE_MEANFIELD = 42;
MULTINOMIAL_LOGISTIC_LOSS = 16;
MVN = 34;
POOLING = 17;
POWER = 26;
RELU = 18;
SIGMOID = 19;
SIGMOID_CROSS_ENTROPY_LOSS = 27;
SILENCE = 36;
SOFTMAX = 20;
SOFTMAX_LOSS = 21;
SPLIT = 22;
SLICE = 33;
TANH = 23;
WINDOW_DATA = 24;
THRESHOLD = 31;
}
optional LayerType type = 5;
repeated BlobProto blobs = 6;
repeated string param = 1001;
repeated DimCheckMode blob_share_mode = 1002;
enum DimCheckMode {
STRICT = 0;
PERMISSIVE = 1;
}
repeated float blobs_lr = 7;
repeated float weight_decay = 8;
repeated float loss_weight = 35;
optional AccuracyParameter accuracy_param = 27;
optional ArgMaxParameter argmax_param = 23;
optional ConcatParameter concat_param = 9;
optional ContrastiveLossParameter contrastive_loss_param = 40;
optional ConvolutionParameter convolution_param = 10;
optional CropParameter crop_param = 203;
optional DataParameter data_param = 11;
optional DropoutParameter dropout_param = 12;
optional DummyDataParameter dummy_data_param = 26;
optional EltwiseParameter eltwise_param = 24;
optional ExpParameter exp_param = 41;
optional HDF5DataParameter hdf5_data_param = 13;
optional HDF5OutputParameter hdf5_output_param = 14;
optional HingeLossParameter hinge_loss_param = 29;
optional ImageDataParameter image_data_param = 15;
optional InfogainLossParameter infogain_loss_param = 16;
optional InnerProductParameter inner_product_param = 17;
optional LRNParameter lrn_param = 18;
optional MultiStageMeanfieldParameter multi_stage_meanfield_param = 202;
optional MemoryDataParameter memory_data_param = 22;
optional MVNParameter mvn_param = 34;
optional PoolingParameter pooling_param = 19;
optional PowerParameter power_param = 21;
optional ReLUParameter relu_param = 30;
optional SigmoidParameter sigmoid_param = 38;
optional SoftmaxParameter softmax_param = 39;
optional SliceParameter slice_param = 31;
optional TanHParameter tanh_param = 37;
optional ThresholdParameter threshold_param = 25;
optional WindowDataParameter window_data_param = 20;
optional TransformationParameter transform_param = 36;
optional LossParameter loss_param = 42;
optional V0LayerParameter layer = 1;
}
// DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
// in Caffe. We keep this message type around for legacy support.
message V0LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the string to specify the layer type
// Parameters to specify layers with inner products.
optional uint32 num_output = 3; // The number of outputs for the layer
optional bool biasterm = 4 [default = true]; // whether to have bias terms
optional FillerParameter weight_filler = 5; // The filler for the weight
optional FillerParameter bias_filler = 6; // The filler for the bias
optional uint32 pad = 7 [default = 0]; // The padding size
optional uint32 kernelsize = 8; // The kernel size
optional uint32 group = 9 [default = 1]; // The group size for group conv
optional uint32 stride = 10 [default = 1]; // The stride
enum PoolMethod {
MAX = 0;
AVE = 1;
STOCHASTIC = 2;
}
optional PoolMethod pool = 11 [default = MAX]; // The pooling method
optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
optional uint32 local_size = 13 [default = 5]; // for local response norm
optional float alpha = 14 [default = 1.]; // for local response norm
optional float beta = 15 [default = 0.75]; // for local response norm
optional float k = 22 [default = 1.];
// For data layers, specify the data source
optional string source = 16;
// For data pre-processing, we can do simple scaling and subtracting the
// data mean, if provided. Note that the mean subtraction is always carried
// out before scaling.
optional float scale = 17 [default = 1];
optional string meanfile = 18;
// For data layers, specify the batch size.
optional uint32 batchsize = 19;
// For data layers, specify if we would like to randomly crop an image.
optional uint32 cropsize = 20 [default = 0];
// For data layers, specify if we want to randomly mirror data.
optional bool mirror = 21 [default = false];
// The blobs containing the numeric parameters of the layer
repeated BlobProto blobs = 50;
// The ratio that is multiplied on the global learning rate. If you want to
// set the learning ratio for one blob, you need to set it for all blobs.
repeated float blobs_lr = 51;
// The weight decay that is multiplied on the global weight decay.
repeated float weight_decay = 52;
// The rand_skip variable is for the data layer to skip a few data points
// to avoid all asynchronous sgd clients to start at the same point. The skip
// point would be set as rand_skip * rand(0,1). Note that rand_skip should not
// be larger than the number of keys in the database.
optional uint32 rand_skip = 53 [default = 0];
// Fields related to detection (det_*)
// foreground (object) overlap threshold
optional float det_fg_threshold = 54 [default = 0.5];
// background (non-object) overlap threshold
optional float det_bg_threshold = 55 [default = 0.5];
// Fraction of batch that should be foreground objects
optional float det_fg_fraction = 56 [default = 0.25];
// optional bool OBSOLETE_can_clobber = 57 [default = true];
// Amount of contextual padding to add around a window
// (used only by the window_data_layer)
optional uint32 det_context_pad = 58 [default = 0];
// Mode for cropping out a detection window
// warp: cropped window is warped to a fixed size and aspect ratio
// square: the tightest square around the window is cropped
optional string det_crop_mode = 59 [default = "warp"];
// For ReshapeLayer, one needs to specify the new dimensions.
optional int32 new_num = 60 [default = 0];
optional int32 new_channels = 61 [default = 0];
optional int32 new_height = 62 [default = 0];
optional int32 new_width = 63 [default = 0];
// Whether or not ImageLayer should shuffle the list of files at every epoch.
// It will also resize images if new_height or new_width are not zero.
optional bool shuffle_images = 64 [default = false];
// For ConcatLayer, one needs to specify the dimension for concatenation, and
// the other dimensions must be the same for all the bottom blobs.
// By default it will concatenate blobs along the channels dimension.
optional uint32 concat_dim = 65 [default = 1];
optional HDF5OutputParameter hdf5_output_param = 1001;
}
message PReLUParameter {
// Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
// Surpassing Human-Level Performance on ImageNet Classification, 2015.
// Initial value of a_i. Default is a_i=0.25 for all i.
optional FillerParameter filler = 1;
// Whether or not slope paramters are shared across channels.
optional bool channel_shared = 2 [default = false];
}
(5)修改/src/caffe/util/upgrade_proto.cpp
为啥要修改升级proto,因为CRFasRNN的作者提供的是旧的格式的proto,因此需要这个来升级一下网络的配置文件。
#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/text_format.h>
#include <map>
#include <string>
#include "caffe/common.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/io.hpp"
#include "caffe/util/upgrade_proto.hpp"
namespace caffe {
bool NetNeedsUpgrade(const NetParameter& net_param) {
return NetNeedsV0ToV1Upgrade(net_param) || NetNeedsV1ToV2Upgrade(net_param);
}
bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param) {
bool success = true;
if (NetNeedsV0ToV1Upgrade(*param)) {
// NetParameter was specified using the old style (V0LayerParameter); try to
// upgrade it.
LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
<< "V0LayerParameter: " << param_file;
NetParameter original_param(*param);
if (!UpgradeV0Net(original_param, param)) {
success = false;
LOG(ERROR) << "Warning: had one or more problems upgrading "
<< "V0NetParameter to NetParameter (see above); continuing anyway.";
} else {
LOG(INFO) << "Successfully upgraded file specified using deprecated "
<< "V0LayerParameter";
}
LOG(WARNING) << "Note that future Caffe releases will not support "
<< "V0NetParameter; use ./build/tools/upgrade_net_proto_text for "
<< "prototxt and ./build/tools/upgrade_net_proto_binary for model "
<< "weights upgrade this and any other net protos to the new format.";
}
// NetParameter uses old style data transformation fields; try to upgrade it.
if (NetNeedsDataUpgrade(*param)) {
LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
<< "transformation parameters: " << param_file;
UpgradeNetDataTransformation(param);
LOG(INFO) << "Successfully upgraded file specified using deprecated "
<< "data transformation parameters.";
LOG(WARNING) << "Note that future Caffe releases will only support "
<< "transform_param messages for transformation fields.";
}
if (NetNeedsV1ToV2Upgrade(*param)) {
LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
<< "V1LayerParameter: " << param_file;
NetParameter original_param(*param);
if (!UpgradeV1Net(original_param, param)) {
success = false;
LOG(ERROR) << "Warning: had one or more problems upgrading "
<< "V1LayerParameter (see above); continuing anyway.";
} else {
LOG(INFO) << "Successfully upgraded file specified using deprecated "
<< "V1LayerParameter";
}
}
return success;
}
void ReadNetParamsFromTextFileOrDie(const string& param_file,
NetParameter* param) {
CHECK(ReadProtoFromTextFile(param_file, param))
<< "Failed to parse NetParameter file: " << param_file;
UpgradeNetAsNeeded(param_file, param);
}
void ReadNetParamsFromBinaryFileOrDie(const string& param_file,
NetParameter* param) {
CHECK(ReadProtoFromBinaryFile(param_file, param))
<< "Failed to parse NetParameter file: " << param_file;
UpgradeNetAsNeeded(param_file, param);
}
bool NetNeedsV0ToV1Upgrade(const NetParameter& net_param) {
for (int i = 0; i < net_param.layers_size(); ++i) {
if (net_param.layers(i).has_layer()) {
return true;
}
}
return false;
}
bool NetNeedsV1ToV2Upgrade(const NetParameter& net_param) {
return net_param.layers_size() > 0;
}
bool UpgradeV0Net(const NetParameter& v0_net_param_padding_layers,
NetParameter* net_param) {
// First upgrade padding layers to padded conv layers.
NetParameter v0_net_param;
UpgradeV0PaddingLayers(v0_net_param_padding_layers, &v0_net_param);
// Now upgrade layer parameters.
bool is_fully_compatible = true;
net_param->Clear();
if (v0_net_param.has_name()) {
net_param->set_name(v0_net_param.name());
}
for (int i = 0; i < v0_net_param.layers_size(); ++i) {
is_fully_compatible &= UpgradeV0LayerParameter(v0_net_param.layers(i),
net_param->add_layers());
}
for (int i = 0; i < v0_net_param.input_size(); ++i) {
net_param->add_input(v0_net_param.input(i));
}
for (int i = 0; i < v0_net_param.input_dim_size(); ++i) {
net_param->add_input_dim(v0_net_param.input_dim(i));
}
if (v0_net_param.has_force_backward()) {
net_param->set_force_backward(v0_net_param.force_backward());
}
return is_fully_compatible;
}
void UpgradeV0PaddingLayers(const NetParameter& param,
NetParameter* param_upgraded_pad) {
// Copy everything other than the layers from the original param.
param_upgraded_pad->Clear();
param_upgraded_pad->CopyFrom(param);
param_upgraded_pad->clear_layers();
// Figure out which layer each bottom blob comes from.
map<string, int> blob_name_to_last_top_idx;
for (int i = 0; i < param.input_size(); ++i) {
const string& blob_name = param.input(i);
blob_name_to_last_top_idx[blob_name] = -1;
}
for (int i = 0; i < param.layers_size(); ++i) {
const V1LayerParameter& layer_connection = param.layers(i);
const V0LayerParameter& layer_param = layer_connection.layer();
// Add the layer to the new net, unless it's a padding layer.
if (layer_param.type() != "padding") {
param_upgraded_pad->add_layers()->CopyFrom(layer_connection);
}
for (int j = 0; j < layer_connection.bottom_size(); ++j) {
const string& blob_name = layer_connection.bottom(j);
if (blob_name_to_last_top_idx.find(blob_name) ==
blob_name_to_last_top_idx.end()) {
LOG(FATAL) << "Unknown blob input " << blob_name << " to layer " << j;
}
const int top_idx = blob_name_to_last_top_idx[blob_name];
if (top_idx == -1) {
continue;
}
const V1LayerParameter& source_layer = param.layers(top_idx);
if (source_layer.layer().type() == "padding") {
// This layer has a padding layer as input -- check that it is a conv
// layer or a pooling layer and takes only one input. Also check that
// the padding layer input has only one input and one output. Other
// cases have undefined behavior in Caffe.
CHECK((layer_param.type() == "conv") || (layer_param.type() == "pool"))
<< "Padding layer input to "
"non-convolutional / non-pooling layer type "
<< layer_param.type();
CHECK_EQ(layer_connection.bottom_size(), 1)
<< "Conv Layer takes a single blob as input.";
CHECK_EQ(source_layer.bottom_size(), 1)
<< "Padding Layer takes a single blob as input.";
CHECK_EQ(source_layer.top_size(), 1)
<< "Padding Layer produces a single blob as output.";
int layer_index = param_upgraded_pad->layers_size() - 1;
param_upgraded_pad->mutable_layers(layer_index)->mutable_layer()
->set_pad(source_layer.layer().pad());
param_upgraded_pad->mutable_layers(layer_index)
->set_bottom(j, source_layer.bottom(0));
}
}
for (int j = 0; j < layer_connection.top_size(); ++j) {
const string& blob_name = layer_connection.top(j);
blob_name_to_last_top_idx[blob_name] = i;
}
}
}
bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection,
V1LayerParameter* layer_param) {
bool is_fully_compatible = true;
layer_param->Clear();
for (int i = 0; i < v0_layer_connection.bottom_size(); ++i) {
layer_param->add_bottom(v0_layer_connection.bottom(i));
}
for (int i = 0; i < v0_layer_connection.top_size(); ++i) {
layer_param->add_top(v0_layer_connection.top(i));
}
if (v0_layer_connection.has_layer()) {
const V0LayerParameter& v0_layer_param = v0_layer_connection.layer();
if (v0_layer_param.has_name()) {
layer_param->set_name(v0_layer_param.name());
}
const string& type = v0_layer_param.type();
if (v0_layer_param.has_type()) {
layer_param->set_type(UpgradeV0LayerType(type));
}
for (int i = 0; i < v0_layer_param.blobs_size(); ++i) {
layer_param->add_blobs()->CopyFrom(v0_layer_param.blobs(i));
}
for (int i = 0; i < v0_layer_param.blobs_lr_size(); ++i) {
layer_param->add_blobs_lr(v0_layer_param.blobs_lr(i));
}
for (int i = 0; i < v0_layer_param.weight_decay_size(); ++i) {
layer_param->add_weight_decay(v0_layer_param.weight_decay(i));
}
if (v0_layer_param.has_num_output()) {
if (type == "conv") {
layer_param->mutable_convolution_param()->set_num_output(
v0_layer_param.num_output());
} else if (type == "innerproduct") {
layer_param->mutable_inner_product_param()->set_num_output(
v0_layer_param.num_output());
} else {
LOG(ERROR) << "Unknown parameter num_output for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_biasterm()) {
if (type == "conv") {
layer_param->mutable_convolution_param()->set_bias_term(
v0_layer_param.biasterm());
} else if (type == "innerproduct") {
layer_param->mutable_inner_product_param()->set_bias_term(
v0_layer_param.biasterm());
} else {
LOG(ERROR) << "Unknown parameter biasterm for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_weight_filler()) {
if (type == "conv") {
layer_param->mutable_convolution_param()->
mutable_weight_filler()->CopyFrom(v0_layer_param.weight_filler());
} else if (type == "innerproduct") {
layer_param->mutable_inner_product_param()->
mutable_weight_filler()->CopyFrom(v0_layer_param.weight_filler());
} else {
LOG(ERROR) << "Unknown parameter weight_filler for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_bias_filler()) {
if (type == "conv") {
layer_param->mutable_convolution_param()->
mutable_bias_filler()->CopyFrom(v0_layer_param.bias_filler());
} else if (type == "innerproduct") {
layer_param->mutable_inner_product_param()->
mutable_bias_filler()->CopyFrom(v0_layer_param.bias_filler());
} else {
LOG(ERROR) << "Unknown parameter bias_filler for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_pad()) {
if (type == "conv") {
layer_param->mutable_convolution_param()->add_pad(v0_layer_param.pad());
} else if (type == "pool") {
layer_param->mutable_pooling_param()->set_pad(v0_layer_param.pad());
} else {
LOG(ERROR) << "Unknown parameter pad for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_kernelsize()) {
if (type == "conv") {
layer_param->mutable_convolution_param()->add_kernel_size(
v0_layer_param.kernelsize());
} else if (type == "pool") {
layer_param->mutable_pooling_param()->set_kernel_size(
v0_layer_param.kernelsize());
} else {
LOG(ERROR) << "Unknown parameter kernelsize for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_group()) {
if (type == "conv") {
layer_param->mutable_convolution_param()->set_group(
v0_layer_param.group());
} else {
LOG(ERROR) << "Unknown parameter group for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_stride()) {
if (type == "conv") {
layer_param->mutable_convolution_param()->add_stride(
v0_layer_param.stride());
} else if (type == "pool") {
layer_param->mutable_pooling_param()->set_stride(
v0_layer_param.stride());
} else {
LOG(ERROR) << "Unknown parameter stride for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_pool()) {
if (type == "pool") {
V0LayerParameter_PoolMethod pool = v0_layer_param.pool();
switch (pool) {
case V0LayerParameter_PoolMethod_MAX:
layer_param->mutable_pooling_param()->set_pool(
PoolingParameter_PoolMethod_MAX);
break;
case V0LayerParameter_PoolMethod_AVE:
layer_param->mutable_pooling_param()->set_pool(
PoolingParameter_PoolMethod_AVE);
break;
case V0LayerParameter_PoolMethod_STOCHASTIC:
layer_param->mutable_pooling_param()->set_pool(
PoolingParameter_PoolMethod_STOCHASTIC);
break;
default:
LOG(ERROR) << "Unknown pool method " << pool;
is_fully_compatible = false;
}
} else {
LOG(ERROR) << "Unknown parameter pool for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_dropout_ratio()) {
if (type == "dropout") {
layer_param->mutable_dropout_param()->set_dropout_ratio(
v0_layer_param.dropout_ratio());
} else {
LOG(ERROR) << "Unknown parameter dropout_ratio for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_local_size()) {
if (type == "lrn") {
layer_param->mutable_lrn_param()->set_local_size(
v0_layer_param.local_size());
} else {
LOG(ERROR) << "Unknown parameter local_size for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_alpha()) {
if (type == "lrn") {
layer_param->mutable_lrn_param()->set_alpha(v0_layer_param.alpha());
} else {
LOG(ERROR) << "Unknown parameter alpha for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_beta()) {
if (type == "lrn") {
layer_param->mutable_lrn_param()->set_beta(v0_layer_param.beta());
} else {
LOG(ERROR) << "Unknown parameter beta for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_k()) {
if (type == "lrn") {
layer_param->mutable_lrn_param()->set_k(v0_layer_param.k());
} else {
LOG(ERROR) << "Unknown parameter k for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_source()) {
if (type == "data") {
layer_param->mutable_data_param()->set_source(v0_layer_param.source());
} else if (type == "hdf5_data") {
layer_param->mutable_hdf5_data_param()->set_source(
v0_layer_param.source());
} else if (type == "images") {
layer_param->mutable_image_data_param()->set_source(
v0_layer_param.source());
} else if (type == "window_data") {
layer_param->mutable_window_data_param()->set_source(
v0_layer_param.source());
} else if (type == "infogain_loss") {
layer_param->mutable_infogain_loss_param()->set_source(
v0_layer_param.source());
} else {
LOG(ERROR) << "Unknown parameter source for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_scale()) {
layer_param->mutable_transform_param()->
set_scale(v0_layer_param.scale());
}
if (v0_layer_param.has_meanfile()) {
layer_param->mutable_transform_param()->
set_mean_file(v0_layer_param.meanfile());
}
if (v0_layer_param.has_batchsize()) {
if (type == "data") {
layer_param->mutable_data_param()->set_batch_size(
v0_layer_param.batchsize());
} else if (type == "hdf5_data") {
layer_param->mutable_hdf5_data_param()->set_batch_size(
v0_layer_param.batchsize());
} else if (type == "images") {
layer_param->mutable_image_data_param()->set_batch_size(
v0_layer_param.batchsize());
} else if (type == "window_data") {
layer_param->mutable_window_data_param()->set_batch_size(
v0_layer_param.batchsize());
} else {
LOG(ERROR) << "Unknown parameter batchsize for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_cropsize()) {
layer_param->mutable_transform_param()->
set_crop_size(v0_layer_param.cropsize());
}
if (v0_layer_param.has_mirror()) {
layer_param->mutable_transform_param()->
set_mirror(v0_layer_param.mirror());
}
if (v0_layer_param.has_rand_skip()) {
if (type == "data") {
layer_param->mutable_data_param()->set_rand_skip(
v0_layer_param.rand_skip());
} else if (type == "images") {
layer_param->mutable_image_data_param()->set_rand_skip(
v0_layer_param.rand_skip());
} else {
LOG(ERROR) << "Unknown parameter rand_skip for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_shuffle_images()) {
if (type == "images") {
layer_param->mutable_image_data_param()->set_shuffle(
v0_layer_param.shuffle_images());
} else {
LOG(ERROR) << "Unknown parameter shuffle for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_new_height()) {
if (type == "images") {
layer_param->mutable_image_data_param()->set_new_height(
v0_layer_param.new_height());
} else {
LOG(ERROR) << "Unknown parameter new_height for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_new_width()) {
if (type == "images") {
layer_param->mutable_image_data_param()->set_new_width(
v0_layer_param.new_width());
} else {
LOG(ERROR) << "Unknown parameter new_width for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_concat_dim()) {
if (type == "concat") {
layer_param->mutable_concat_param()->set_concat_dim(
v0_layer_param.concat_dim());
} else {
LOG(ERROR) << "Unknown parameter concat_dim for layer type " << type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_det_fg_threshold()) {
if (type == "window_data") {
layer_param->mutable_window_data_param()->set_fg_threshold(
v0_layer_param.det_fg_threshold());
} else {
LOG(ERROR) << "Unknown parameter det_fg_threshold for layer type "
<< type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_det_bg_threshold()) {
if (type == "window_data") {
layer_param->mutable_window_data_param()->set_bg_threshold(
v0_layer_param.det_bg_threshold());
} else {
LOG(ERROR) << "Unknown parameter det_bg_threshold for layer type "
<< type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_det_fg_fraction()) {
if (type == "window_data") {
layer_param->mutable_window_data_param()->set_fg_fraction(
v0_layer_param.det_fg_fraction());
} else {
LOG(ERROR) << "Unknown parameter det_fg_fraction for layer type "
<< type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_det_context_pad()) {
if (type == "window_data") {
layer_param->mutable_window_data_param()->set_context_pad(
v0_layer_param.det_context_pad());
} else {
LOG(ERROR) << "Unknown parameter det_context_pad for layer type "
<< type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_det_crop_mode()) {
if (type == "window_data") {
layer_param->mutable_window_data_param()->set_crop_mode(
v0_layer_param.det_crop_mode());
} else {
LOG(ERROR) << "Unknown parameter det_crop_mode for layer type "
<< type;
is_fully_compatible = false;
}
}
if (v0_layer_param.has_hdf5_output_param()) {
if (type == "hdf5_output") {
layer_param->mutable_hdf5_output_param()->CopyFrom(
v0_layer_param.hdf5_output_param());
} else {
LOG(ERROR) << "Unknown parameter hdf5_output_param for layer type "
<< type;
is_fully_compatible = false;
}
}
}
return is_fully_compatible;
}
V1LayerParameter_LayerType UpgradeV0LayerType(const string& type) {
if (type == "accuracy") {
return V1LayerParameter_LayerType_ACCURACY;
} else if (type == "bnll") {
return V1LayerParameter_LayerType_BNLL;
} else if (type == "concat") {
return V1LayerParameter_LayerType_CONCAT;
} else if (type == "conv") {
return V1LayerParameter_LayerType_CONVOLUTION;
} else if (type == "data") {
return V1LayerParameter_LayerType_DATA;
} else if (type == "dropout") {
return V1LayerParameter_LayerType_DROPOUT;
} else if (type == "euclidean_loss") {
return V1LayerParameter_LayerType_EUCLIDEAN_LOSS;
} else if (type == "flatten") {
return V1LayerParameter_LayerType_FLATTEN;
} else if (type == "hdf5_data") {
return V1LayerParameter_LayerType_HDF5_DATA;
} else if (type == "hdf5_output") {
return V1LayerParameter_LayerType_HDF5_OUTPUT;
} else if (type == "im2col") {
return V1LayerParameter_LayerType_IM2COL;
} else if (type == "images") {
return V1LayerParameter_LayerType_IMAGE_DATA;
} else if (type == "infogain_loss") {
return V1LayerParameter_LayerType_INFOGAIN_LOSS;
} else if (type == "innerproduct") {
return V1LayerParameter_LayerType_INNER_PRODUCT;
} else if (type == "lrn") {
return V1LayerParameter_LayerType_LRN;
} else if (type == "multinomial_logistic_loss") {
return V1LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS;
} else if (type == "pool") {
return V1LayerParameter_LayerType_POOLING;
} else if (type == "relu") {
return V1LayerParameter_LayerType_RELU;
} else if (type == "sigmoid") {
return V1LayerParameter_LayerType_SIGMOID;
} else if (type == "softmax") {
return V1LayerParameter_LayerType_SOFTMAX;
} else if (type == "softmax_loss") {
return V1LayerParameter_LayerType_SOFTMAX_LOSS;
} else if (type == "split") {
return V1LayerParameter_LayerType_SPLIT;
} else if (type == "tanh") {
return V1LayerParameter_LayerType_TANH;
} else if (type == "window_data") {
return V1LayerParameter_LayerType_WINDOW_DATA;
} else {
LOG(FATAL) << "Unknown layer name: " << type;
return V1LayerParameter_LayerType_NONE;
}
}
bool NetNeedsDataUpgrade(const NetParameter& net_param) {
for (int i = 0; i < net_param.layers_size(); ++i) {
if (net_param.layers(i).type() == V1LayerParameter_LayerType_DATA) {
DataParameter layer_param = net_param.layers(i).data_param();
if (layer_param.has_scale()) { return true; }
if (layer_param.has_mean_file()) { return true; }
if (layer_param.has_crop_size()) { return true; }
if (layer_param.has_mirror()) { return true; }
}
if (net_param.layers(i).type() == V1LayerParameter_LayerType_IMAGE_DATA) {
ImageDataParameter layer_param = net_param.layers(i).image_data_param();
if (layer_param.has_scale()) { return true; }
if (layer_param.has_mean_file()) { return true; }
if (layer_param.has_crop_size()) { return true; }
if (layer_param.has_mirror()) { return true; }
}
if (net_param.layers(i).type() == V1LayerParameter_LayerType_WINDOW_DATA) {
WindowDataParameter layer_param = net_param.layers(i).window_data_param();
if (layer_param.has_scale()) { return true; }
if (layer_param.has_mean_file()) { return true; }
if (layer_param.has_crop_size()) { return true; }
if (layer_param.has_mirror()) { return true; }
}
}
return false;
}
#define CONVERT_LAYER_TRANSFORM_PARAM(TYPE, Name, param_name) \
do { \
if (net_param->layers(i).type() == V1LayerParameter_LayerType_##TYPE) { \
Name##Parameter* layer_param = \
net_param->mutable_layers(i)->mutable_##param_name##_param(); \
TransformationParameter* transform_param = \
net_param->mutable_layers(i)->mutable_transform_param(); \
if (layer_param->has_scale()) { \
transform_param->set_scale(layer_param->scale()); \
layer_param->clear_scale(); \
} \
if (layer_param->has_mean_file()) { \
transform_param->set_mean_file(layer_param->mean_file()); \
layer_param->clear_mean_file(); \
} \
if (layer_param->has_crop_size()) { \
transform_param->set_crop_size(layer_param->crop_size()); \
layer_param->clear_crop_size(); \
} \
if (layer_param->has_mirror()) { \
transform_param->set_mirror(layer_param->mirror()); \
layer_param->clear_mirror(); \
} \
} \
} while (0)
void UpgradeNetDataTransformation(NetParameter* net_param) {
for (int i = 0; i < net_param->layers_size(); ++i) {
CONVERT_LAYER_TRANSFORM_PARAM(DATA, Data, data);
CONVERT_LAYER_TRANSFORM_PARAM(IMAGE_DATA, ImageData, image_data);
CONVERT_LAYER_TRANSFORM_PARAM(WINDOW_DATA, WindowData, window_data);
}
}
bool UpgradeV1Net(const NetParameter& v1_net_param, NetParameter* net_param) {
bool is_fully_compatible = true;
if (v1_net_param.layer_size() > 0) {
LOG(ERROR) << "Input NetParameter to be upgraded already specifies 'layer' "
<< "fields; these will be ignored for the upgrade.";
is_fully_compatible = false;
}
net_param->CopyFrom(v1_net_param);
net_param->clear_layers();
net_param->clear_layer();
for (int i = 0; i < v1_net_param.layers_size(); ++i) {
if (!UpgradeV1LayerParameter(v1_net_param.layers(i),
net_param->add_layer())) {
LOG(ERROR) << "Upgrade of input layer " << i << " failed.";
is_fully_compatible = false;
}
}
return is_fully_compatible;
}
bool UpgradeV1LayerParameter(const V1LayerParameter& v1_layer_param,
LayerParameter* layer_param) {
layer_param->Clear();
bool is_fully_compatible = true;
for (int i = 0; i < v1_layer_param.bottom_size(); ++i) {
layer_param->add_bottom(v1_layer_param.bottom(i));
}
for (int i = 0; i < v1_layer_param.top_size(); ++i) {
layer_param->add_top(v1_layer_param.top(i));
}
if (v1_layer_param.has_name()) {
layer_param->set_name(v1_layer_param.name());
}
for (int i = 0; i < v1_layer_param.include_size(); ++i) {
layer_param->add_include()->CopyFrom(v1_layer_param.include(i));
}
for (int i = 0; i < v1_layer_param.exclude_size(); ++i) {
layer_param->add_exclude()->CopyFrom(v1_layer_param.exclude(i));
}
if (v1_layer_param.has_type()) {
layer_param->set_type(UpgradeV1LayerType(v1_layer_param.type()));
}
for (int i = 0; i < v1_layer_param.blobs_size(); ++i) {
layer_param->add_blobs()->CopyFrom(v1_layer_param.blobs(i));
}
for (int i = 0; i < v1_layer_param.param_size(); ++i) {
while (layer_param->param_size() <= i) { layer_param->add_param(); }
layer_param->mutable_param(i)->set_name(v1_layer_param.param(i));
}
ParamSpec_DimCheckMode mode;
for (int i = 0; i < v1_layer_param.blob_share_mode_size(); ++i) {
while (layer_param->param_size() <= i) { layer_param->add_param(); }
switch (v1_layer_param.blob_share_mode(i)) {
case V1LayerParameter_DimCheckMode_STRICT:
mode = ParamSpec_DimCheckMode_STRICT;
break;
case V1LayerParameter_DimCheckMode_PERMISSIVE:
mode = ParamSpec_DimCheckMode_PERMISSIVE;
break;
default:
LOG(FATAL) << "Unknown blob_share_mode: "
<< v1_layer_param.blob_share_mode(i);
break;
}
layer_param->mutable_param(i)->set_share_mode(mode);
}
for (int i = 0; i < v1_layer_param.blobs_lr_size(); ++i) {
while (layer_param->param_size() <= i) { layer_param->add_param(); }
layer_param->mutable_param(i)->set_lr_mult(v1_layer_param.blobs_lr(i));
}
for (int i = 0; i < v1_layer_param.weight_decay_size(); ++i) {
while (layer_param->param_size() <= i) { layer_param->add_param(); }
layer_param->mutable_param(i)->set_decay_mult(
v1_layer_param.weight_decay(i));
}
for (int i = 0; i < v1_layer_param.loss_weight_size(); ++i) {
layer_param->add_loss_weight(v1_layer_param.loss_weight(i));
}
if (v1_layer_param.has_accuracy_param()) {
layer_param->mutable_accuracy_param()->CopyFrom(
v1_layer_param.accuracy_param());
}
if (v1_layer_param.has_argmax_param()) {
layer_param->mutable_argmax_param()->CopyFrom(
v1_layer_param.argmax_param());
}
if (v1_layer_param.has_concat_param()) {
layer_param->mutable_concat_param()->CopyFrom(
v1_layer_param.concat_param());
}
if (v1_layer_param.has_contrastive_loss_param()) {
layer_param->mutable_contrastive_loss_param()->CopyFrom(
v1_layer_param.contrastive_loss_param());
}
if (v1_layer_param.has_convolution_param()) {
layer_param->mutable_convolution_param()->CopyFrom(
v1_layer_param.convolution_param());
}
if (v1_layer_param.has_data_param()) {
layer_param->mutable_data_param()->CopyFrom(
v1_layer_param.data_param());
}
if (v1_layer_param.has_dropout_param()) {
layer_param->mutable_dropout_param()->CopyFrom(
v1_layer_param.dropout_param());
}
if (v1_layer_param.has_dummy_data_param()) {
layer_param->mutable_dummy_data_param()->CopyFrom(
v1_layer_param.dummy_data_param());
}
if (v1_layer_param.has_eltwise_param()) {
layer_param->mutable_eltwise_param()->CopyFrom(
v1_layer_param.eltwise_param());
}
if (v1_layer_param.has_exp_param()) {
layer_param->mutable_exp_param()->CopyFrom(
v1_layer_param.exp_param());
}
if (v1_layer_param.has_hdf5_data_param()) {
layer_param->mutable_hdf5_data_param()->CopyFrom(
v1_layer_param.hdf5_data_param());
}
if (v1_layer_param.has_hdf5_output_param()) {
layer_param->mutable_hdf5_output_param()->CopyFrom(
v1_layer_param.hdf5_output_param());
}
if (v1_layer_param.has_hinge_loss_param()) {
layer_param->mutable_hinge_loss_param()->CopyFrom(
v1_layer_param.hinge_loss_param());
}
if (v1_layer_param.has_image_data_param()) {
layer_param->mutable_image_data_param()->CopyFrom(
v1_layer_param.image_data_param());
}
if (v1_layer_param.has_infogain_loss_param()) {
layer_param->mutable_infogain_loss_param()->CopyFrom(
v1_layer_param.infogain_loss_param());
}
if (v1_layer_param.has_inner_product_param()) {
layer_param->mutable_inner_product_param()->CopyFrom(
v1_layer_param.inner_product_param());
}
if (v1_layer_param.has_lrn_param()) {
layer_param->mutable_lrn_param()->CopyFrom(
v1_layer_param.lrn_param());
}
if (v1_layer_param.has_memory_data_param()) {
layer_param->mutable_memory_data_param()->CopyFrom(
v1_layer_param.memory_data_param());
}
if (v1_layer_param.has_mvn_param()) {
layer_param->mutable_mvn_param()->CopyFrom(
v1_layer_param.mvn_param());
}
if (v1_layer_param.has_pooling_param()) {
layer_param->mutable_pooling_param()->CopyFrom(
v1_layer_param.pooling_param());
}
if (v1_layer_param.has_power_param()) {
layer_param->mutable_power_param()->CopyFrom(
v1_layer_param.power_param());
}
if (v1_layer_param.has_relu_param()) {
layer_param->mutable_relu_param()->CopyFrom(
v1_layer_param.relu_param());
}
if (v1_layer_param.has_sigmoid_param()) {
layer_param->mutable_sigmoid_param()->CopyFrom(
v1_layer_param.sigmoid_param());
}
if (v1_layer_param.has_softmax_param()) {
layer_param->mutable_softmax_param()->CopyFrom(
v1_layer_param.softmax_param());
}
if (v1_layer_param.has_slice_param()) {
layer_param->mutable_slice_param()->CopyFrom(
v1_layer_param.slice_param());
}
if (v1_layer_param.has_tanh_param()) {
layer_param->mutable_tanh_param()->CopyFrom(
v1_layer_param.tanh_param());
}
if (v1_layer_param.has_threshold_param()) {
layer_param->mutable_threshold_param()->CopyFrom(
v1_layer_param.threshold_param());
}
if (v1_layer_param.has_window_data_param()) {
layer_param->mutable_window_data_param()->CopyFrom(
v1_layer_param.window_data_param());
}
if (v1_layer_param.has_transform_param()) {
layer_param->mutable_transform_param()->CopyFrom(
v1_layer_param.transform_param());
}
if (v1_layer_param.has_loss_param()) {
layer_param->mutable_loss_param()->CopyFrom(
v1_layer_param.loss_param());
}
if (v1_layer_param.has_layer()) {
LOG(ERROR) << "Input NetParameter has V0 layer -- ignoring.";
is_fully_compatible = false;
}
return is_fully_compatible;
}
const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type) {
switch (type) {
case V1LayerParameter_LayerType_NONE:
return "";
case V1LayerParameter_LayerType_ABSVAL:
return "AbsVal";
case V1LayerParameter_LayerType_ACCURACY:
return "Accuracy";
case V1LayerParameter_LayerType_ARGMAX:
return "ArgMax";
case V1LayerParameter_LayerType_BNLL:
return "BNLL";
case V1LayerParameter_LayerType_CONCAT:
return "Concat";
case V1LayerParameter_LayerType_CONTRASTIVE_LOSS:
return "ContrastiveLoss";
case V1LayerParameter_LayerType_CONVOLUTION:
return "Convolution";
case V1LayerParameter_LayerType_CROP:
return "Crop";
case V1LayerParameter_LayerType_DECONVOLUTION:
return "Deconvolution";
case V1LayerParameter_LayerType_DATA:
return "Data";
case V1LayerParameter_LayerType_DROPOUT:
return "Dropout";
case V1LayerParameter_LayerType_DUMMY_DATA:
return "DummyData";
case V1LayerParameter_LayerType_EUCLIDEAN_LOSS:
return "EuclideanLoss";
case V1LayerParameter_LayerType_ELTWISE:
return "Eltwise";
case V1LayerParameter_LayerType_EXP:
return "Exp";
case V1LayerParameter_LayerType_FLATTEN:
return "Flatten";
case V1LayerParameter_LayerType_HDF5_DATA:
return "HDF5Data";
case V1LayerParameter_LayerType_HDF5_OUTPUT:
return "HDF5Output";
case V1LayerParameter_LayerType_HINGE_LOSS:
return "HingeLoss";
case V1LayerParameter_LayerType_IM2COL:
return "Im2col";
case V1LayerParameter_LayerType_IMAGE_DATA:
return "ImageData";
case V1LayerParameter_LayerType_INFOGAIN_LOSS:
return "InfogainLoss";
case V1LayerParameter_LayerType_INNER_PRODUCT:
return "InnerProduct";
case V1LayerParameter_LayerType_LRN:
return "LRN";
case V1LayerParameter_LayerType_MEMORY_DATA:
return "MemoryData";
case V1LayerParameter_LayerType_MULTI_STAGE_MEANFIELD:
return "MultiStageMeanfield";
case V1LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS:
return "MultinomialLogisticLoss";
case V1LayerParameter_LayerType_MVN:
return "MVN";
case V1LayerParameter_LayerType_POOLING:
return "Pooling";
case V1LayerParameter_LayerType_POWER:
return "Power";
case V1LayerParameter_LayerType_RELU:
return "ReLU";
case V1LayerParameter_LayerType_SIGMOID:
return "Sigmoid";
case V1LayerParameter_LayerType_SIGMOID_CROSS_ENTROPY_LOSS:
return "SigmoidCrossEntropyLoss";
case V1LayerParameter_LayerType_SILENCE:
return "Silence";
case V1LayerParameter_LayerType_SOFTMAX:
return "Softmax";
case V1LayerParameter_LayerType_SOFTMAX_LOSS:
return "SoftmaxWithLoss";
case V1LayerParameter_LayerType_SPLIT:
return "Split";
case V1LayerParameter_LayerType_SLICE:
return "Slice";
case V1LayerParameter_LayerType_TANH:
return "TanH";
case V1LayerParameter_LayerType_WINDOW_DATA:
return "WindowData";
case V1LayerParameter_LayerType_THRESHOLD:
return "Threshold";
default:
LOG(FATAL) << "Unknown V1LayerParameter layer type: " << type;
return "";
}
}
// Return true iff the solver contains any old solver_type specified as enums
bool SolverNeedsTypeUpgrade(const SolverParameter& solver_param) {
if (solver_param.has_solver_type()) {
return true;
}
return false;
}
bool UpgradeSolverType(SolverParameter* solver_param) {
CHECK(!solver_param->has_solver_type() || !solver_param->has_type())
<< "Failed to upgrade solver: old solver_type field (enum) and new type "
<< "field (string) cannot be both specified in solver proto text.";
if (solver_param->has_solver_type()) {
string type;
switch (solver_param->solver_type()) {
case SolverParameter_SolverType_SGD:
type = "SGD";
break;
case SolverParameter_SolverType_NESTEROV:
type = "Nesterov";
break;
case SolverParameter_SolverType_ADAGRAD:
type = "AdaGrad";
break;
case SolverParameter_SolverType_RMSPROP:
type = "RMSProp";
break;
case SolverParameter_SolverType_ADADELTA:
type = "AdaDelta";
break;
case SolverParameter_SolverType_ADAM:
type = "Adam";
break;
default:
LOG(FATAL) << "Unknown SolverParameter solver_type: " << type;
}
solver_param->set_type(type);
solver_param->clear_solver_type();
} else {
LOG(ERROR) << "Warning: solver type already up to date. ";
return false;
}
return true;
}
// Check for deprecations and upgrade the SolverParameter as needed.
bool UpgradeSolverAsNeeded(const string& param_file, SolverParameter* param) {
bool success = true;
// Try to upgrade old style solver_type enum fields into new string type
if (SolverNeedsTypeUpgrade(*param)) {
LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
<< "'solver_type' field (enum)': " << param_file;
if (!UpgradeSolverType(param)) {
success = false;
LOG(ERROR) << "Warning: had one or more problems upgrading "
<< "SolverType (see above).";
} else {
LOG(INFO) << "Successfully upgraded file specified using deprecated "
<< "'solver_type' field (enum) to 'type' field (string).";
LOG(WARNING) << "Note that future Caffe releases will only support "
<< "'type' field (string) for a solver's type.";
}
}
return success;
}
// Read parameters from a file into a SolverParameter proto message.
void ReadSolverParamsFromTextFileOrDie(const string& param_file,
SolverParameter* param) {
CHECK(ReadProtoFromTextFile(param_file, param))
<< "Failed to parse SolverParameter file: " << param_file;
UpgradeSolverAsNeeded(param_file, param);
}
} // namespace caffe
升级之后的网络定义TVG_CRF_RNN_COCO_VOC.prototxt如下:
name: "TVG_CRF_RNN_COCO_VOC"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 500
input_dim: 500
force_backward: true
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 100
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
engine: CAFFE
}
}
layer {
name: "relu5_3"
type: "ReLU"
bottom: "conv5_3"
top: "conv5_3"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5_3"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "fc6"
type: "Convolution"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 4096
kernel_size: 7
engine: CAFFE
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7"
type: "Convolution"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 4096
kernel_size: 1
engine: CAFFE
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "score-fr"
type: "Convolution"
bottom: "fc7"
top: "score"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 21
kernel_size: 1
engine: CAFFE
}
}
layer {
name: "score2"
type: "Deconvolution"
bottom: "score"
top: "score2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 21
kernel_size: 4
stride: 2
}
}
layer {
name: "score-pool4"
type: "Convolution"
bottom: "pool4"
top: "score-pool4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 21
kernel_size: 1
engine: CAFFE
}
}
layer {
name: "crop"
type: "Crop"
bottom: "score-pool4"
bottom: "score2"
top: "score-pool4c"
}
layer {
name: "fuse"
type: "Eltwise"
bottom: "score2"
bottom: "score-pool4c"
top: "score-fused"
eltwise_param {
operation: SUM
}
}
layer {
name: "score4"
type: "Deconvolution"
bottom: "score-fused"
top: "score4"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 21
bias_term: false
kernel_size: 4
stride: 2
}
}
layer {
name: "score-pool3"
type: "Convolution"
bottom: "pool3"
top: "score-pool3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 21
kernel_size: 1
engine: CAFFE
}
}
layer {
name: "crop"
type: "Crop"
bottom: "score-pool3"
bottom: "score4"
top: "score-pool3c"
}
layer {
name: "fuse"
type: "Eltwise"
bottom: "score4"
bottom: "score-pool3c"
top: "score-final"
eltwise_param {
operation: SUM
}
}
layer {
name: "upsample"
type: "Deconvolution"
bottom: "score-final"
top: "bigscore"
param {
lr_mult: 0
}
convolution_param {
num_output: 21
bias_term: false
kernel_size: 16
stride: 8
}
}
layer {
name: "crop"
type: "Crop"
bottom: "bigscore"
bottom: "data"
top: "coarse"
}
layer {
name: "splitting"
type: "Split"
bottom: "coarse"
top: "unary"
top: "Q0"
}
layer {
name: "inference1"
type: "MultiStageMeanfield"
bottom: "unary"
bottom: "Q0"
bottom: "data"
top: "pred"
param {
lr_mult: 0.001
}
param {
lr_mult: 0.001
}
param {
lr_mult: 0.01
}
multi_stage_meanfield_param {
num_iterations: 10
compatibility_mode: POTTS
threshold: 2
theta_alpha: 160
theta_beta: 3
theta_gamma: 3
spatial_filter_weight: 3
bilateral_filter_weight: 5
}
}
(6)将旧的matlab程序进行适配,以适应新的caffe的版本
crfrnn_demo.m文件如下:
% This package contains code for the "CRF-RNN" semantic image segmentation method, published in the
% ICCV 2015 paper Conditional Random Fields as Recurrent Neural Networks. Our software is built on
% top of the Caffe deep learning library.
%
% Contact:
%
% Supervisor:
%
% For more information about CRF-RNN please vist the project website http://crfasrnn.torr.vision.
%
model_def_file = 'TVG_CRFRNN_COCO_VOC.prototxt';
model_file = 'TVG_CRFRNN_COCO_VOC.caffemodel';
phase = 'test';
use_gpu = 0; % Set this to 0 if you don't have a GPU.
if exist(model_file, 'file') ~= 2
error('You need a network model file. Please download our default model by running ./download_trained_model.sh');
end
if exist('use_gpu', 'var') && use_gpu
caffe.set_mode_gpu();
gpu_id = 0; % we will use the first gpu in this demo
caffe.set_device(gpu_id);
else
caffe.set_mode_cpu();
end
net = caffe.Net(model_def_file, model_file, phase);
im = imread('input.jpg');
[h, w, d] = size(im);
if (d ~= 3)
error('Error! Wrong depth.\n');
end
if (h > 500 || w > 500)
error('Error! Wrong image size.\n');
end
prepared_im = tvg_prepare_image_fixed(im);
inputData = {prepared_im};
scores = net.forward(inputData);
Q = scores{1};
[dumb, pred] = max(Q, [], 3);
pred = pred';
pred = pred(1:h, 1:w);
load map.mat
imwrite(pred, map, 'output.png', 'png');
总结一下:
本次移植主要涉及到以下文件:
include和src下的util文件夹下
coords.hpp
modified_permutohedral.hpp
modified_permutohedral.cpp
upgrade_proto.cpp
include和src下的layers文件夹下
meanfield_iteration.hpp
meanfield_iteration.cpp
multi_stage_meanfield.hpp
multi_stage_meanfield.cpp
crop_layer.hpp(从官方的master去下载即可)
crop_layer.cpp(从官方的master去下载即可)
caffe/include下的
layer.hpp
还有matlab下的
crfrnn_demo.m
CRFAsRNN可以从
http://www.robots.ox.ac.uk/~szheng/crfasrnndemo/下载
另外windows下的Caffe我博客里面有写怎么下载和安装的
移植到windows下的文件的打包可以下载
http://download.csdn.net/detail/xizero00/9493870
移植之后的结果貌似跟原来的结果不大一样,可能是因为用了caffe官方的CropLayer的缘故,所以结果出了偏差。
原图:
这里windows下我移植的效果
linux下跑的结果