Caffe 自定义数据输入层

Caffe 自定义数据输入层

如果手写数字训练集是一张大的图片,如下图 digits.png所示 ,怎样用它来进行训练呢?

这个图片的大小为1000*2000,有0-9的10个数字,每5行为一个数字,总共50行,100列,共有5000个手写数字,每个图片尺寸为20*20

一种方法是我们可以把每个数字从这张大图中截取出来,保存为一个个小的的数字图片(20*20),形成新的训练集。

还有一种方法是我们可以定义自己的数据输入层

可以参考caffe中自带的 image_data_layer

步骤:

1. 创建新定义的头文件 include/caffe/layers/my_mnist_data_layer.hpp

     (1) 给新定义的 layer 起个名字

     virtual inline const char* type() const { return "MyMnistData"; }

     (2) 如果不需要 GPU operation 的话,可以注释掉于GPU相关的函数声明,例如 Forward_gpu()

#ifndef CAFFE_MY_MNIST_DATA_LAYER_HPP_
#define CAFFE_MY_MNIST_DATA_LAYER_HPP_

#include 
#include 
#include 

#include "caffe/blob.hpp"
#include "caffe/data_transformer.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

/**
 * @brief Provides data to the Net from image files.
 *
 * TODO(dox): thorough documentation for Forward and proto params.
 */
template 
class MyMnistDataLayer : public BasePrefetchingDataLayer {
 public:
  explicit MyMnistDataLayer(const LayerParameter& param)
      : BasePrefetchingDataLayer(param) {}
  virtual ~MyMnistDataLayer();
  virtual void DataLayerSetUp(const vector*>& bottom,
      const vector*>& top);

  virtual inline const char* type() const { return "MyMnistData"; }
  virtual inline int ExactNumBottomBlobs() const { return 0; }
  virtual inline int ExactNumTopBlobs() const { return 2; }

 protected:
  shared_ptr prefetch_rng_;
  virtual void ShuffleImages();
  virtual void load_batch(Batch* batch);
  
  vector lines_;
  int lines_id_;
};


}  // namespace caffe

#endif  // CAFFE_MY_MNIST_DATA_LAYER_HPP_

2. 创建对应的源文件 src/caffe/src/my_mnist_data_layer.cpp

    需要重写三个函数:

    (1)DataLayerSetUp:定义好从prototxt读入的参数名和容器的规格(设好N, K, H, W)
    (2)ShuffleImages:打乱顺序    (非必须重写)
    (3)load_batch:把图片读入到内存   

#ifdef USE_OPENCV
#include 

#include   // NOLINT(readability/streams)
#include   // NOLINT(readability/streams)
#include 
#include 
#include 
#include 

#include "caffe/data_transformer.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/layers/my_mnist_data_layer.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"

namespace caffe {

template 
MyMnistDataLayer::~MyMnistDataLayer() {
  this->StopInternalThread();
}

template 
void MyMnistDataLayer::DataLayerSetUp(const vector*>& bottom,
      const vector*>& top) {
  const int batch_height = this->layer_param_.my_mnist_data_param().batch_height();
  const int batch_width  = this->layer_param_.my_mnist_data_param().batch_width(); 
  const bool is_color  = this->layer_param_.my_mnist_data_param().is_color(); 
  int col_account  = this->layer_param_.my_mnist_data_param().col_account();  
  int col_start = this->layer_param_.my_mnist_data_param().col_start();  
  LOG(INFO) << "batch_height=" << batch_height << ", batch_width=" << batch_width
  	           << ", col_start=" << col_start << ", col_account=" << col_account ;  
  const int batch_size = this->layer_param_.my_mnist_data_param().batch_size();
  CHECK_GT(batch_size, 0) << "Positive batch size required";
  
  // Read the file with filenames and labels
  const string& source = this->layer_param_.my_mnist_data_param().source();
  LOG(INFO) << "Opening file " << source;
  
  cv::Mat srcImg = ReadImageToCVMat(source, is_color);
  int height = (batch_height < srcImg.rows && batch_height > 0) ? batch_height : srcImg.rows;
  int width  = (batch_width < srcImg.cols && batch_width > 0) ? batch_width : srcImg.cols;
  int total_rows = srcImg.rows / height;
  int total_cols = srcImg.cols / width;
  if ((col_start + col_account) > total_cols)
  	col_account = total_cols - col_start;
  if (col_account > 0 && col_account < total_cols)
  	total_cols = col_account;

  for(int col = col_start; col < col_start + col_account; col++) {
      int x =  col * width;
      for(int row = 0; row < total_rows; row++){
                int y = row * height;
		int label = row / 5;
                char temp[128]={0};
                sprintf(temp, "%d,%d,%d", x, y, label);
                string strValue(temp);
                lines_.push_back(strValue);		
      }
  }  
  
  CHECK(!lines_.empty()) << "File is empty";
  if (this->layer_param_.my_mnist_data_param().shuffle()) {
    // randomly shuffle data
    LOG(INFO) << "Shuffling data";
    const unsigned int prefetch_rng_seed = caffe_rng_rand();
    prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
    ShuffleImages();
  } else {
    if (this->phase_ == TRAIN && Caffe::solver_rank() > 0 &&
        this->layer_param_.my_mnist_data_param().rand_skip() == 0) {
      LOG(WARNING) << "Shuffling or skipping recommended for multi-GPU";
    }
  }
  LOG(INFO) << "A total of " << lines_.size() << " images.";

  lines_id_ = 0;
  // Check if we would need to randomly skip a few data points
  if (this->layer_param_.my_mnist_data_param().rand_skip()) {
    unsigned int skip = caffe_rng_rand() %
        this->layer_param_.my_mnist_data_param().rand_skip();
    LOG(INFO) << "Skipping first " << skip << " data points.";
    CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
    lines_id_ = skip;
  }
  // Read an image, and use it to initialize the top blob.
  cv::Mat cv_img = srcImg(cv::Rect(0, 0, width, height));
  CHECK(cv_img.data) << "Could not load 22" ;
  // Use data_transformer to infer the expected blob shape from a cv_image.
  vector top_shape = this->data_transformer_->InferBlobShape(cv_img);
  this->transformed_data_.Reshape(top_shape);
  // Reshape prefetch_data and top[0] according to the batch_size.
  
  top_shape[0] = batch_size;
  for (int i = 0; i < this->prefetch_.size(); ++i) {
    this->prefetch_[i]->data_.Reshape(top_shape);
  }
  top[0]->Reshape(top_shape);

  LOG(INFO) << "output data size: " << top[0]->num() << ","
      << top[0]->channels() << "," << top[0]->height() << ","
      << top[0]->width();
  // label
  vector label_shape(1, batch_size);
  top[1]->Reshape(label_shape);
  for (int i = 0; i < this->prefetch_.size(); ++i) {
    this->prefetch_[i]->label_.Reshape(label_shape);
  }
}

template 
void MyMnistDataLayer::ShuffleImages() {
  caffe::rng_t* prefetch_rng =
      static_cast(prefetch_rng_->generator());
  shuffle(lines_.begin(), lines_.end(), prefetch_rng);
}

// This function is called on prefetch thread
template 
void MyMnistDataLayer::load_batch(Batch* batch) {
  CPUTimer batch_timer;
  batch_timer.Start();
  double read_time = 0;
  double trans_time = 0;
  CPUTimer timer;
  CHECK(batch->data_.count());
  CHECK(this->transformed_data_.count());
  MyMnisDataParameter my_mnist_data_param = this->layer_param_.my_mnist_data_param();
  const int batch_size = my_mnist_data_param.batch_size();
  const int batch_height = my_mnist_data_param.batch_height();
  const int batch_width = my_mnist_data_param.batch_width();
  const bool is_color = my_mnist_data_param.is_color();
  //string root_folder = my_mnist_data_param.root_folder();

  // Reshape according to the first image of each batch
  // on single input batches allows for inputs of varying dimension.
  const string& source = this->layer_param_.my_mnist_data_param().source();
  cv::Mat srcImg = ReadImageToCVMat(source, is_color);
  CHECK(srcImg.data) << "Could not load " << source;
  // Use data_transformer to infer the expected blob shape from a cv_img.
  cv::Mat cv_img = srcImg(cv::Rect(0, 0, batch_width, batch_height));
  vector top_shape = this->data_transformer_->InferBlobShape(cv_img);
  this->transformed_data_.Reshape(top_shape);
  // Reshape batch according to the batch_size.
  top_shape[0] = batch_size;
  batch->data_.Reshape(top_shape);

  Dtype* prefetch_data = batch->data_.mutable_cpu_data();
  Dtype* prefetch_label = batch->label_.mutable_cpu_data();

  // datum scales
  const int lines_size = lines_.size();
  for (int item_id = 0; item_id < batch_size; ++item_id) {
    // get a blob
    timer.Start();
    CHECK_GT(lines_size, lines_id_);
 
    string strValue = lines_[lines_id_];
    int x, y, label;
    sscanf(strValue.c_str(), "%d,%d,%d", &x, &y, &label);
    cv::Mat cv_img = srcImg(cv::Rect(x, y, batch_width, batch_height));
    CHECK(cv_img.data) << "Could not load " ;
    read_time += timer.MicroSeconds();
    timer.Start();
    // Apply transformations (mirror, crop...) to the image
    int offset = batch->data_.offset(item_id);
    this->transformed_data_.set_cpu_data(prefetch_data + offset);
    this->data_transformer_->Transform(cv_img, &(this->transformed_data_));
    trans_time += timer.MicroSeconds();

    prefetch_label[item_id] = label;
    // go to the next iter
    lines_id_++;
    if (lines_id_ >= lines_size) {
      // We have reached the end. Restart from the first.
      DLOG(INFO) << "Restarting data prefetching from start.";
      lines_id_ = 0;
      if (this->layer_param_.my_mnist_data_param().shuffle()) {
        ShuffleImages();
      }
    }
  }
  batch_timer.Stop();
  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";
  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}

INSTANTIATE_CLASS(MyMnistDataLayer);
REGISTER_LAYER_CLASS(MyMnistData);

}  // namespace caffe
#endif  // USE_OPENCV

上面的代码中 INSTANTIATE_CLASS(MyMnistDataLayer) 和 REGISTER_LAYER_CLASS(MyMnistData) 是对新定义数据层的宏定义

数据输入层一般不需要GPU相关的operation,如果需要的话,还需要创建和GPU operation相关的源文件,例如,my_mnist_data_layer.cu 本文中没有用到

3. 在src/caffe/proto/caffe.proto 里对新定义的层进行注册

......
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
message LayerParameter {
  optional string name = 1; // the layer name
  optional string type = 2; // the layer type
  repeated string bottom = 3; // the name of each bottom blob
......
  optional SliceParameter slice_param = 126;
  optional TanHParameter tanh_param = 127;
  optional ThresholdParameter threshold_param = 128;
  optional TileParameter tile_param = 138;
  optional WindowDataParameter window_data_param = 129;
  optional MyMnisDataParameter my_mnist_data_param = 147;//newly added for my data input layer
}

......

message MyMnisDataParameter {
  optional string  source = 1;
  optional uint32 batch_size = 2 [default = 1];
  optional uint32 batch_height = 3 [default = 20];
  optional uint32 batch_width = 4 [default = 20];
  optional uint32 col_start = 5 [default = 0];
  optional uint32 col_account = 6 [default = 1];
  optional uint32 rand_skip = 7 [default = 0];
  optional bool shuffle = 8 [default = false];
  optional bool is_color = 9 [default = false];
}
......

4. 重新编译和 Install

    mark@ubuntu:~/caffe/build$ make all

    mark@ubuntu:~/caffe/build$ make install


5. 用自己定义的新的数据输入层进行训练

上面一起OK后,下面就要准备训练了,

首先要有与net相关的proto 文件以及solve 文件

定义网络结构的proto文件,我们可以在caffe/examples/lenet_train_test.prototxt 的基础上修改一个,只需要修改data layer即可, my_lenet_train_test.prototxt的内容 如下:

name: "LeNet"
layer {
  name: "mnist"
  type: "MyMnistData"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  }
  transform_param {
    scale: 0.00390625
  }
  my_mnist_data_param {
    source: "/home/mark/caffe/examples/mnist/digits.png"
    batch_size: 10   
    batch_height: 20
    batch_width: 20
    col_start: 0
    col_account: 80
  }
}
layer {
  name: "mnist"
  type: "MyMnistData"
  top: "data"
  top: "label"
  include {
    phase: TEST
  }
  transform_param {
    scale: 0.00390625
  }
  my_mnist_data_param {
    source: "/home/mark/caffe/examples/mnist/digits.png"
    batch_size: 10
    batch_height: 20
    batch_width: 20
    col_start: 80
    col_account: 20
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1"
  type: "InnerProduct"
  bottom: "pool2"
  top: "ip1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "ip1"
  top: "ip1"
}
layer {
  name: "ip2"
  type: "InnerProduct"
  bottom: "ip1"
  top: "ip2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "ip2"
  bottom: "label"
  top: "accuracy"
  include {
    phase: TEST
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "ip2"
  bottom: "label"
  top: "loss"
}

和lenet_train_test.prototxt 原有的内容相比,只是修改了前两个layer, 从中可以看出,在 digits.png 中,选取前80列作为训练集,后20列作为测试集

同样 solver 文件,也只是在原先 caffe/examples/lenet_solver.prototxt 的基础上稍微修改下即可, my_lenet_solver.prototxt的内容如下:

# The train/test net protocol buffer definition
net: "examples/mnist/my_lenet_train_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 10 and 400 test iterations,
# covering the full 4,000 testing images.
test_iter: 400
# Carry out testing every 100 training iterations.
test_interval: 100
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "examples/mnist/lenet"
# solver mode: CPU or GPU
solver_mode: GPU

下面就开始训练了

mark@ubuntu:~$ cd caffe
mark@ubuntu:~/caffe$ ./build/tools/caffe train --solver=examples/mnist/my_lenet_solver.prototxt







你可能感兴趣的:(Caffe,caffe)