Caffe 自定义数据输入层
如果手写数字训练集是一张大的图片,如下图 digits.png所示 ,怎样用它来进行训练呢?
这个图片的大小为1000*2000,有0-9的10个数字,每5行为一个数字,总共50行,100列,共有5000个手写数字,每个图片尺寸为20*20
一种方法是我们可以把每个数字从这张大图中截取出来,保存为一个个小的的数字图片(20*20),形成新的训练集。
还有一种方法是我们可以定义自己的数据输入层
可以参考caffe中自带的 image_data_layer
步骤:
1. 创建新定义的头文件 include/caffe/layers/my_mnist_data_layer.hpp
(1) 给新定义的 layer 起个名字
virtual inline const char* type() const { return "MyMnistData"; }
(2) 如果不需要 GPU operation 的话,可以注释掉于GPU相关的函数声明,例如 Forward_gpu()
#ifndef CAFFE_MY_MNIST_DATA_LAYER_HPP_
#define CAFFE_MY_MNIST_DATA_LAYER_HPP_
#include
#include
#include
#include "caffe/blob.hpp"
#include "caffe/data_transformer.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/proto/caffe.pb.h"
namespace caffe {
/**
* @brief Provides data to the Net from image files.
*
* TODO(dox): thorough documentation for Forward and proto params.
*/
template
class MyMnistDataLayer : public BasePrefetchingDataLayer {
public:
explicit MyMnistDataLayer(const LayerParameter& param)
: BasePrefetchingDataLayer(param) {}
virtual ~MyMnistDataLayer();
virtual void DataLayerSetUp(const vector*>& bottom,
const vector*>& top);
virtual inline const char* type() const { return "MyMnistData"; }
virtual inline int ExactNumBottomBlobs() const { return 0; }
virtual inline int ExactNumTopBlobs() const { return 2; }
protected:
shared_ptr prefetch_rng_;
virtual void ShuffleImages();
virtual void load_batch(Batch* batch);
vector lines_;
int lines_id_;
};
} // namespace caffe
#endif // CAFFE_MY_MNIST_DATA_LAYER_HPP_
需要重写三个函数:
(1)DataLayerSetUp:定义好从prototxt读入的参数名和容器的规格(设好N, K, H, W)
(2)ShuffleImages:打乱顺序 (非必须重写)
(3)load_batch:把图片读入到内存
#ifdef USE_OPENCV
#include
#include // NOLINT(readability/streams)
#include // NOLINT(readability/streams)
#include
#include
#include
#include
#include "caffe/data_transformer.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/layers/my_mnist_data_layer.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"
namespace caffe {
template
MyMnistDataLayer::~MyMnistDataLayer() {
this->StopInternalThread();
}
template
void MyMnistDataLayer::DataLayerSetUp(const vector*>& bottom,
const vector*>& top) {
const int batch_height = this->layer_param_.my_mnist_data_param().batch_height();
const int batch_width = this->layer_param_.my_mnist_data_param().batch_width();
const bool is_color = this->layer_param_.my_mnist_data_param().is_color();
int col_account = this->layer_param_.my_mnist_data_param().col_account();
int col_start = this->layer_param_.my_mnist_data_param().col_start();
LOG(INFO) << "batch_height=" << batch_height << ", batch_width=" << batch_width
<< ", col_start=" << col_start << ", col_account=" << col_account ;
const int batch_size = this->layer_param_.my_mnist_data_param().batch_size();
CHECK_GT(batch_size, 0) << "Positive batch size required";
// Read the file with filenames and labels
const string& source = this->layer_param_.my_mnist_data_param().source();
LOG(INFO) << "Opening file " << source;
cv::Mat srcImg = ReadImageToCVMat(source, is_color);
int height = (batch_height < srcImg.rows && batch_height > 0) ? batch_height : srcImg.rows;
int width = (batch_width < srcImg.cols && batch_width > 0) ? batch_width : srcImg.cols;
int total_rows = srcImg.rows / height;
int total_cols = srcImg.cols / width;
if ((col_start + col_account) > total_cols)
col_account = total_cols - col_start;
if (col_account > 0 && col_account < total_cols)
total_cols = col_account;
for(int col = col_start; col < col_start + col_account; col++) {
int x = col * width;
for(int row = 0; row < total_rows; row++){
int y = row * height;
int label = row / 5;
char temp[128]={0};
sprintf(temp, "%d,%d,%d", x, y, label);
string strValue(temp);
lines_.push_back(strValue);
}
}
CHECK(!lines_.empty()) << "File is empty";
if (this->layer_param_.my_mnist_data_param().shuffle()) {
// randomly shuffle data
LOG(INFO) << "Shuffling data";
const unsigned int prefetch_rng_seed = caffe_rng_rand();
prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
ShuffleImages();
} else {
if (this->phase_ == TRAIN && Caffe::solver_rank() > 0 &&
this->layer_param_.my_mnist_data_param().rand_skip() == 0) {
LOG(WARNING) << "Shuffling or skipping recommended for multi-GPU";
}
}
LOG(INFO) << "A total of " << lines_.size() << " images.";
lines_id_ = 0;
// Check if we would need to randomly skip a few data points
if (this->layer_param_.my_mnist_data_param().rand_skip()) {
unsigned int skip = caffe_rng_rand() %
this->layer_param_.my_mnist_data_param().rand_skip();
LOG(INFO) << "Skipping first " << skip << " data points.";
CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
lines_id_ = skip;
}
// Read an image, and use it to initialize the top blob.
cv::Mat cv_img = srcImg(cv::Rect(0, 0, width, height));
CHECK(cv_img.data) << "Could not load 22" ;
// Use data_transformer to infer the expected blob shape from a cv_image.
vector top_shape = this->data_transformer_->InferBlobShape(cv_img);
this->transformed_data_.Reshape(top_shape);
// Reshape prefetch_data and top[0] according to the batch_size.
top_shape[0] = batch_size;
for (int i = 0; i < this->prefetch_.size(); ++i) {
this->prefetch_[i]->data_.Reshape(top_shape);
}
top[0]->Reshape(top_shape);
LOG(INFO) << "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width();
// label
vector label_shape(1, batch_size);
top[1]->Reshape(label_shape);
for (int i = 0; i < this->prefetch_.size(); ++i) {
this->prefetch_[i]->label_.Reshape(label_shape);
}
}
template
void MyMnistDataLayer::ShuffleImages() {
caffe::rng_t* prefetch_rng =
static_cast(prefetch_rng_->generator());
shuffle(lines_.begin(), lines_.end(), prefetch_rng);
}
// This function is called on prefetch thread
template
void MyMnistDataLayer::load_batch(Batch* batch) {
CPUTimer batch_timer;
batch_timer.Start();
double read_time = 0;
double trans_time = 0;
CPUTimer timer;
CHECK(batch->data_.count());
CHECK(this->transformed_data_.count());
MyMnisDataParameter my_mnist_data_param = this->layer_param_.my_mnist_data_param();
const int batch_size = my_mnist_data_param.batch_size();
const int batch_height = my_mnist_data_param.batch_height();
const int batch_width = my_mnist_data_param.batch_width();
const bool is_color = my_mnist_data_param.is_color();
//string root_folder = my_mnist_data_param.root_folder();
// Reshape according to the first image of each batch
// on single input batches allows for inputs of varying dimension.
const string& source = this->layer_param_.my_mnist_data_param().source();
cv::Mat srcImg = ReadImageToCVMat(source, is_color);
CHECK(srcImg.data) << "Could not load " << source;
// Use data_transformer to infer the expected blob shape from a cv_img.
cv::Mat cv_img = srcImg(cv::Rect(0, 0, batch_width, batch_height));
vector top_shape = this->data_transformer_->InferBlobShape(cv_img);
this->transformed_data_.Reshape(top_shape);
// Reshape batch according to the batch_size.
top_shape[0] = batch_size;
batch->data_.Reshape(top_shape);
Dtype* prefetch_data = batch->data_.mutable_cpu_data();
Dtype* prefetch_label = batch->label_.mutable_cpu_data();
// datum scales
const int lines_size = lines_.size();
for (int item_id = 0; item_id < batch_size; ++item_id) {
// get a blob
timer.Start();
CHECK_GT(lines_size, lines_id_);
string strValue = lines_[lines_id_];
int x, y, label;
sscanf(strValue.c_str(), "%d,%d,%d", &x, &y, &label);
cv::Mat cv_img = srcImg(cv::Rect(x, y, batch_width, batch_height));
CHECK(cv_img.data) << "Could not load " ;
read_time += timer.MicroSeconds();
timer.Start();
// Apply transformations (mirror, crop...) to the image
int offset = batch->data_.offset(item_id);
this->transformed_data_.set_cpu_data(prefetch_data + offset);
this->data_transformer_->Transform(cv_img, &(this->transformed_data_));
trans_time += timer.MicroSeconds();
prefetch_label[item_id] = label;
// go to the next iter
lines_id_++;
if (lines_id_ >= lines_size) {
// We have reached the end. Restart from the first.
DLOG(INFO) << "Restarting data prefetching from start.";
lines_id_ = 0;
if (this->layer_param_.my_mnist_data_param().shuffle()) {
ShuffleImages();
}
}
}
batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}
INSTANTIATE_CLASS(MyMnistDataLayer);
REGISTER_LAYER_CLASS(MyMnistData);
} // namespace caffe
#endif // USE_OPENCV
数据输入层一般不需要GPU相关的operation,如果需要的话,还需要创建和GPU operation相关的源文件,例如,my_mnist_data_layer.cu 本文中没有用到
3. 在src/caffe/proto/caffe.proto 里对新定义的层进行注册
......
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
repeated string bottom = 3; // the name of each bottom blob
......
optional SliceParameter slice_param = 126;
optional TanHParameter tanh_param = 127;
optional ThresholdParameter threshold_param = 128;
optional TileParameter tile_param = 138;
optional WindowDataParameter window_data_param = 129;
optional MyMnisDataParameter my_mnist_data_param = 147;//newly added for my data input layer
}
......
message MyMnisDataParameter {
optional string source = 1;
optional uint32 batch_size = 2 [default = 1];
optional uint32 batch_height = 3 [default = 20];
optional uint32 batch_width = 4 [default = 20];
optional uint32 col_start = 5 [default = 0];
optional uint32 col_account = 6 [default = 1];
optional uint32 rand_skip = 7 [default = 0];
optional bool shuffle = 8 [default = false];
optional bool is_color = 9 [default = false];
}
......
mark@ubuntu:~/caffe/build$ make all
mark@ubuntu:~/caffe/build$ make install
5. 用自己定义的新的数据输入层进行训练
上面一起OK后,下面就要准备训练了,
首先要有与net相关的proto 文件以及solve 文件
定义网络结构的proto文件,我们可以在caffe/examples/lenet_train_test.prototxt 的基础上修改一个,只需要修改data layer即可, my_lenet_train_test.prototxt的内容 如下:
name: "LeNet"
layer {
name: "mnist"
type: "MyMnistData"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
}
my_mnist_data_param {
source: "/home/mark/caffe/examples/mnist/digits.png"
batch_size: 10
batch_height: 20
batch_width: 20
col_start: 0
col_account: 80
}
}
layer {
name: "mnist"
type: "MyMnistData"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
}
my_mnist_data_param {
source: "/home/mark/caffe/examples/mnist/digits.png"
batch_size: 10
batch_height: 20
batch_width: 20
col_start: 80
col_account: 20
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "ip2"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "ip2"
bottom: "label"
top: "loss"
}
同样 solver 文件,也只是在原先 caffe/examples/lenet_solver.prototxt 的基础上稍微修改下即可, my_lenet_solver.prototxt的内容如下:
# The train/test net protocol buffer definition
net: "examples/mnist/my_lenet_train_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 10 and 400 test iterations,
# covering the full 4,000 testing images.
test_iter: 400
# Carry out testing every 100 training iterations.
test_interval: 100
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "examples/mnist/lenet"
# solver mode: CPU or GPU
solver_mode: GPU
mark@ubuntu:~$ cd caffe
mark@ubuntu:~/caffe$ ./build/tools/caffe train --solver=examples/mnist/my_lenet_solver.prototxt