Caffe 官方提供的直接从 image 文件读取图像数据及对应label.
数据格式为:
# train.txt
001.jpg 1
002.jpg 2
003.jpg 3
网络层定义:
# train_val.prototxt
layer {
name: "data"
type: "ImageData"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
scale: 0.00390625
#crop_size: 224
#mean_value: 128
mean_value: 104
mean_value: 117
mean_value: 123
}
image_data_param {
source: "/path/to/train.txt"
root_folder: "/path/to/images"
new_height: 224
new_width: 224
batch_size: 32
shuffle: true
}
}
message LayerParameter {
optional ImageDataParameter image_data_param = 115;
}
message ImageDataParameter {
// 指定图片数据 txt 路径.
optional string source = 1;
// batch size.
optional uint32 batch_size = 4 [default = 1];
// 随机跳过部分数据样本,以避免所有同步 sgd 客户端开始的样本相同.
// 其中,跳过的点设置为 rand_skip * rand(0,1).
// rand_skip 小于等于数据集样本数.
optional uint32 rand_skip = 7 [default = 0];
// 每个 epoch 后打乱数据顺序
optional bool shuffle = 8 [default = false];
// resize 图片到指定的 new_height 和 new_width 尺寸.
optional uint32 new_height = 9 [default = 0];
optional uint32 new_width = 10 [default = 0];
// 图片是彩色还是灰度图 color or gray
optional bool is_color = 11 [default = true];
// DEPRECATED. See TransformationParameter.
// 数据预处理时,可以进行简单的缩放(scale) 和减均值处理
// 减均值是在缩放处理前进行.
optional float scale = 2 [default = 1];
optional string mean_file = 3;
// DEPRECATED. See TransformationParameter.
// 从图片随机裁剪.
optional uint32 crop_size = 5 [default = 0];
// DEPRECATED. See TransformationParameter.
// 随机水平翻转.
optional bool mirror = 6 [default = false];
optional string root_folder = 12 [default = ""];
}
# include \
公有成员函数:
ImageDataLayer (const LayerParameter ¶m)
DataLayerSetUp (const vector< Blob< Dtype > > &bottom, const vector< Blob< Dtype > > &top)
ExactNumBottomBlobs () const // 返回该层的 bottom blobs 数目,如果没有bottom blob,则返回-1.
ExactNumTopBlobs () const // 返回该层的 top blobs 数目, 如果没有 top blob,则返回-1.
protected成员函数:
可以被派生类对象访问,不能被用户代码(类外)访问.
ShuffleImages ()
load_batch (Batch< Dtype > *batch)
Protected Attributes:
prefetch_rng_
lines_
lines_id_
#ifdef USE_OPENCV
#include
#include // NOLINT(readability/streams)
#include // NOLINT(readability/streams)
#include
#include
#include
#include "caffe/data_transformer.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/layers/image_data_layer.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"
namespace caffe {
template <typename Dtype>
ImageDataLayer::~ImageDataLayer() {
this->StopInternalThread();
}
template <typename Dtype>
void ImageDataLayer::DataLayerSetUp(const vector *>& bottom,
const vector *>& top) {
const int new_height = this->layer_param_.image_data_param().new_height();
const int new_width = this->layer_param_.image_data_param().new_width();
const bool is_color = this->layer_param_.image_data_param().is_color();
string root_folder = this->layer_param_.image_data_param().root_folder();
CHECK((new_height == 0 && new_width == 0) ||
(new_height > 0 && new_width > 0)) << "Current implementation requires "
"new_height and new_width to be set at the same time.";
// Read the file with filenames and labels
const string& source = this->layer_param_.image_data_param().source();
LOG(INFO) << "Opening file " << source;
std::ifstream infile(source.c_str());
string line;
size_t pos;
int label;
while (std::getline(infile, line)) {
pos = line.find_last_of(' ');
label = atoi(line.substr(pos + 1).c_str());
lines_.push_back(std::make_pair(line.substr(0, pos), label));
}
CHECK(!lines_.empty()) << "File is empty";
if (this->layer_param_.image_data_param().shuffle()) {
// randomly shuffle data
// 随机打乱数据顺序
LOG(INFO) << "Shuffling data";
const unsigned int prefetch_rng_seed = caffe_rng_rand();
prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
ShuffleImages();
} else {
if (this->phase_ == TRAIN && Caffe::solver_rank() > 0 &&
this->layer_param_.image_data_param().rand_skip() == 0) {
LOG(WARNING) << "Shuffling or skipping recommended for multi-GPU";
}
}
LOG(INFO) << "A total of " << lines_.size() << " images.";
lines_id_ = 0;
// Check if we would need to randomly skip a few data points
// 随机跳过部分数据
if (this->layer_param_.image_data_param().rand_skip()) {
unsigned int skip = caffe_rng_rand() %
this->layer_param_.image_data_param().rand_skip();
LOG(INFO) << "Skipping first " << skip << " data points.";
CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
lines_id_ = skip;
}
// Read an image, and use it to initialize the top blob.
// 读取图片,并放入 top blob.
cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
new_height, new_width, is_color);
CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
// Use data_transformer to infer the expected blob shape from a cv_image.
vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
this->transformed_data_.Reshape(top_shape);
// Reshape prefetch_data and top[0] according to the batch_size.
const int batch_size = this->layer_param_.image_data_param().batch_size();
CHECK_GT(batch_size, 0) << "Positive batch size required";
top_shape[0] = batch_size;
for (int i = 0; i < this->prefetch_.size(); ++i) {
this->prefetch_[i]->data_.Reshape(top_shape);
}
top[0]->Reshape(top_shape);
LOG(INFO) << "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width();
// label
// 数据标签
vector<int> label_shape(1, batch_size);
top[1]->Reshape(label_shape);
for (int i = 0; i < this->prefetch_.size(); ++i) {
this->prefetch_[i]->label_.Reshape(label_shape);
}
}
template <typename Dtype>
void ImageDataLayer::ShuffleImages() {
caffe::rng_t* prefetch_rng =
static_cast(prefetch_rng_->generator());
shuffle(lines_.begin(), lines_.end(), prefetch_rng);
}
// This function is called on prefetch thread
// 预读取数据线程
template <typename Dtype>
void ImageDataLayer::load_batch(Batch* batch) {
CPUTimer batch_timer;
batch_timer.Start();
double read_time = 0;
double trans_time = 0;
CPUTimer timer;
CHECK(batch->data_.count());
CHECK(this->transformed_data_.count());
ImageDataParameter image_data_param = this->layer_param_.image_data_param();
const int batch_size = image_data_param.batch_size();
const int new_height = image_data_param.new_height();
const int new_width = image_data_param.new_width();
const bool is_color = image_data_param.is_color();
string root_folder = image_data_param.root_folder();
// Reshape according to the first image of each batch
// on single input batches allows for inputs of varying dimension.
// 读取图像数据
// 数据维度调整转换
cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
new_height, new_width, is_color);
CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
// Use data_transformer to infer the expected blob shape from a cv_img.
vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
this->transformed_data_.Reshape(top_shape);
// Reshape batch according to the batch_size.
top_shape[0] = batch_size;
batch->data_.Reshape(top_shape);
Dtype* prefetch_data = batch->data_.mutable_cpu_data();
Dtype* prefetch_label = batch->label_.mutable_cpu_data();
// datum scales
const int lines_size = lines_.size();
for (int item_id = 0; item_id < batch_size; ++item_id) {
// get a blob
timer.Start();
CHECK_GT(lines_size, lines_id_);
cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
new_height, new_width, is_color);
CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
read_time += timer.MicroSeconds();
timer.Start();
// Apply transformations (mirror, crop...) to the image
// 图像处理,如 mirror,crop 等
int offset = batch->data_.offset(item_id);
this->transformed_data_.set_cpu_data(prefetch_data + offset);
this->data_transformer_->Transform(cv_img, &(this->transformed_data_));
trans_time += timer.MicroSeconds();
prefetch_label[item_id] = lines_[lines_id_].second;
// go to the next iter
lines_id_++;
if (lines_id_ >= lines_size) {
// We have reached the end. Restart from the first.
DLOG(INFO) << "Restarting data prefetching from start.";
lines_id_ = 0;
if (this->layer_param_.image_data_param().shuffle()) {
ShuffleImages();
}
}
}
batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}
INSTANTIATE_CLASS(ImageDataLayer);
REGISTER_LAYER_CLASS(ImageData);
} // namespace caffe
#endif // USE_OPENCV
[1] - caffe::ImageDataLayer