imagePathPaper: http://jeffdonahue.com/lrcn/
Code: the “lstm_video_deploy” branch of Lisa Anne Hendricks’s Caffe fork
train_test_lstm_RGB.prototxt
name: "lstm_joints"
layer { name: "data" type: "Python" top: "data" top: "label" top: "clip_markers" python_param { module: "sequence_input_layer" layer: "videoReadTrain_RGB" }
include: { phase: TRAIN }
}
layer_factory.cpp
template <typename Dtype>
shared_ptr<Layer<Dtype> > GetPythonLayer(const LayerParameter& param) {
Py_Initialize();
try {
// open "sequence_input_layer.py"
bp::object module = bp::import(param.python_param().module().c_str());
//class videoReadTrain_RGB(videoRead)对象
bp::object layer = module.attr(param.python_param().layer().c_str())(param);
// extract<T> can be used to extract a value of an arbitrary C++ type from an instance Of object
return bp::extract<shared_ptr<PythonLayer<Dtype> > >(layer)();
} catch (bp::error_already_set) {
PyErr_Print();
throw;
}
}
REGISTER_LAYER_CREATOR(Python, GetPythonLayer);
//python层的主要函数是在LayerSetup和Forward时.
template <typename Dtype>
class PythonLayer : public Layer<Dtype> {
public:
PythonLayer(PyObject* self, const LayerParameter& param)
: Layer<Dtype>(param), self_(self) { }//初始化
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
try {
bp::call_method<bp::object>(self_, "setup", bottom, top);
} catch (bp::error_already_set) {
PyErr_Print();
throw;
}
}
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
try {
bp::call_method<bp::object>(self_, "reshape", bottom, top);
} catch (bp::error_already_set) {
PyErr_Print();
throw;
}
}
virtual inline const char* type() const { return "Python"; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
try {
bp::call_method<bp::object>(self_, "forward", bottom, top);
} catch (bp::error_already_set) {
PyErr_Print();
throw;
}
}
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
try {
bp::call_method<bp::object>(self_, "backward", top, propagate_down,
bottom);
} catch (bp::error_already_set) {
PyErr_Print();
throw;
}
}
private:
PyObject* self_;
};
RGB_frames = 'RGBframes/'
test_frames = 16
train_frames = 16
test_buffer = 3
train_buffer = 24
class videoReadTrain_RGB(videoRead):
def initialize(self):
self.train_or_test = 'train'
self.flow = False
self.buffer_size = train_buffer #num videos processed per batch(24)
# 如果video memory(gpu)不够,可以调整
self.frames = train_frames #length of processed clip(16)
# 每个video里选16张连续frames.这个值不可以调整,因为程序写死了.
self.N = self.buffer_size*self.frames
self.idx = 0
self.channels = 3
self.height = 227
self.width = 227
self.path_to_images = RGB_frames
self.video_list = 'ucf101_split1_trainVideos.txt'
#内容如下: eventA/eventA下的某个视频 eventA的标签
#TableTennisShot/v_TableTennisShot_g19_c03 89
#MilitaryParade/v_MilitaryParade_g09_c06 52
#RopeClimbing/v_RopeClimbing_g16_c01 74
class videoRead(caffe.Layer):
def setup(self, bottom, top):
#注意这里!这就保证了每次重新跑实验的时候生成的随机序列都是相同的!
random.seed(10)
self.initialize()
f = open(self.video_list, 'r')
f_lines = f.readlines()
f.close()
video_dict = {}
current_line = 0
self.video_order = []
for ix, line in enumerate(f_lines):
#line: TableTennisShot/v_TableTennisShot_g19_c03 89
video = line.split(' ')[0].split('/')[1] #v_TableTennisShot_g19_c03
l = int(line.split(' ')[1]) #89
#frames:RGBframes/v_TableTennisShot_g19_c03下所有的jpg图片
frames = glob.glob('%s%s/*.jpg' %(self.path_to_images, video))
num_frames = len(frames)
video_dict[video] = {}
#video_dict[video]['frames']:RGBframes/v_TableTennisShot_g19_c03/%04d.jpg
video_dict[video]['frames'] = frames[0].split('.')[0] + '.%04d.jpg' %(self.path_to_images, video))
video_dict[video]['reshape'] = (240,320)
video_dict[video]['crop'] = (227, 227)
video_dict[video]['num_frames'] = num_frames
video_dict[video]['label'] = l
self.video_order.append(video)
self.video_dict = video_dict
self.num_videos = len(video_dict.keys())
#set up data transformer
shape = (self.N, self.channels, self.height, self.width)
self.transformer = caffe.io.Transformer({'data_in': shape})
self.transformer.set_raw_scale('data_in', 255)
if self.flow:
image_mean = [128, 128, 128]
self.transformer.set_is_flow('data_in', True)
else:
image_mean = [103.939, 116.779, 128.68]
self.transformer.set_is_flow('data_in', False)
#Three 227x227 matrices while all the elements are zero
channel_mean = np.zeros((3,227,227))
for channel_index, mean_val in enumerate(image_mean):
#all elements of 1st matrix become 103.939.
#all elements of 2rd matrix become 116.779.
#all elements of 3th matrix become 128.68
channel_mean[channel_index, ...] = mean_val
self.transformer.set_mean('data_in', channel_mean)
self.transformer.set_channel_swap('data_in', (2, 1, 0))
self.transformer.set_transpose('data_in', (2, 0, 1))
self.thread_result = {}
self.thread = None
pool_size = 24
self.image_processor = ImageProcessorCrop(self.transformer, self.flow)
self.sequence_generator = sequenceGeneratorVideo(self.buffer_size, self.frames, self.num_videos, self.video_dict, self.video_order)
self.pool = Pool(processes=pool_size)
self.batch_advancer = BatchAdvancer(self.thread_result, self.sequence_generator, self.image_processor, self.pool)
########################################
self.dispatch_worker()
########################################
self.top_names = ['data', 'label','clip_markers']
print 'Outputs:', self.top_names
if len(top) != len(self.top_names):
raise Exception('Incorrect number of outputs (expected %d, got %d)' %
(len(self.top_names), len(top)))
########################################
self.join_worker()
########################################
for top_index, name in enumerate(self.top_names):
if name == 'data':
shape = (self.N, self.channels, self.height, self.width)
elif name == 'label':
shape = (self.N,)#只有一个元素的tuple
elif name == 'clip_markers':
shape = (self.N,)#只有一个元素的tuple
# * 表示传入的参数的个数不定
# reshape函数:以shape = (self.N, self.channels, self.height, self.width)为例
# top[top_index]会变成self.N个三维数组
# 每个三维数组是self.channels个高self.height宽self.width的矩阵
top[top_index].reshape(*shape)
def reshape(self, bottom, top):
pass
def forward(self, bottom, top):
if self.thread is not None:
#########################################
self.join_worker()
#########################################
#rearrange the data:
#The LSTM takes inputs as [video0_frame0, video1_frame0,...]
#but the data is currently arranged as [video0_frame0, video0_frame1, ...]
new_result_data = [None]*len(self.thread_result['data'])
new_result_label = [None]*len(self.thread_result['label'])
new_result_cm = [None]*len(self.thread_result['clip_markers'])
for i in range(self.frames):
for ii in range(self.buffer_size):
old_idx = ii*self.frames + i
new_idx = i*self.buffer_size + ii
new_result_data[new_idx] = self.thread_result['data'][old_idx]
new_result_label[new_idx] = self.thread_result['label'][old_idx]
new_result_cm[new_idx] = self.thread_result['clip_markers'][old_idx]
for top_index, name in zip(range(len(top)), self.top_names):
if name == 'data':
for i in range(self.N):
top[top_index].data[i, ...] = new_result_data[i]
elif name == 'label':
top[top_index].data[...] = new_result_label
elif name == 'clip_markers':
top[top_index].data[...] = new_result_cm
#################################
self.dispatch_worker()
#################################
def dispatch_worker(self):
assert self.thread is None
self.thread = Thread(target=self.batch_advancer)
#start(): 因为self.batch_advancer是Class BatchAdvancer的对象.
#所以调用Class BatchAdvancer的__call__函数
#从而调用advance_batch函数
self.thread.start()
def join_worker(self):
assert self.thread is not None
#join(): Wait until the thread terminates.
#This blocks the calling thread until the thread whose join() method is called terminates
self.thread.join()
self.thread = None
def backward(self, top, propagate_down, bottom):
pass
def advance_batch(result, sequence_generator, image_processor, pool):
#sequence_generator() 调用sequenceGeneratorVideo类里的__call__
label_r, im_info = sequence_generator()
tmp = image_processor(im_info[0])
result['data'] = pool.map(image_processor, im_info)
result['label'] = label_r
cm = np.ones(len(label_r))
cm[0::16] = 0
#cm起了分割不同视频的作用. trainbuffer=24,cliplength=16
#24x16的全1矩阵,第一列的24个元素全为0.将这个矩阵按行展开就是cm.
result['clip_markers'] = cm
class sequenceGeneratorVideo(object):
def __init__(self, buffer_size, clip_length, num_videos, video_dict, video_order):
self.buffer_size = buffer_size
self.clip_length = clip_length
self.N = self.buffer_size*self.clip_length
self.num_videos = num_videos
self.video_dict = video_dict
self.video_order = video_order
self.idx = 0
def __call__(self):
label_r = []
im_paths = []
im_crop = []
im_reshape = []
im_flip = []
if self.idx + self.buffer_size >= self.num_videos:
idx_list = range(self.idx, self.num_videos)
idx_list.extend(range(0, self.buffer_size-(self.num_videos-self.idx)))
else:
#(train)buffer_size=24
idx_list = range(self.idx, self.idx+self.buffer_size)
#24 videos
for i in idx_list:
key = self.video_order[i]
label = self.video_dict[key]['label']
video_reshape = self.video_dict[key]['reshape']
video_crop = self.video_dict[key]['crop']
#clip_length=16.So 16 elements with same value=[label] will be added to label_r
label_r.extend([label]*self.clip_length)
im_reshape.extend([(video_reshape)]*self.clip_length)
r0 = int(random.random()*(video_reshape[0] - video_crop[0]))
r1 = int(random.random()*(video_reshape[1] - video_crop[1]))
im_crop.extend([(r0, r1, r0+video_crop[0], r1+video_crop[1])]*self.clip_length)
f = random.randint(0,1)
im_flip.extend([f]*self.clip_length)
rand_frame = int(random.random()*(self.video_dict[key]['num_frames']-self.clip_length)+1+1)
frames = []
#frames里存[self.clip_length=16]张连续图片
for i in range(rand_frame,rand_frame+self.clip_length):
frames.append(self.video_dict[key]['frames'] %i)
#for循环结束的时候im_paths里24x16张图片
im_paths.extend(frames)
#z1=[1,2,3]
#z2=[4,5,6]
#result=zip(z1,z2)
#[(1, 4), (2, 5), (3, 6)]
im_info = zip(im_paths,im_crop, im_reshape, im_flip)
#这就保证了每次进来这个__call__函数都访问的是不同的videos
self.idx += self.buffer_size
if self.idx >= self.num_videos:
self.idx = self.idx - self.num_videos
return label_r, im_info
第一层python层是不支持multi-gpu的….因为lock会出问题.
所以来改image_data_layer作为数据第一层吧.
layer { name: "data" type: "ImageData" top: "data" top: "label" top: "clip_markers" include { phase: TRAIN }
transform_param { mirror: true crop_size: 227 mean_value: 103.939 mean_value: 116.779 mean_value: 128.68 }
image_data_param { source: "ucf101_split1_trainVideos.txt" batch_size: 32 #16*2 new_height: 240 new_width: 320 root_folder: "/work/na" }
}
layer { name: "data" type: "ImageData" top: "data" top: "label" top: "clip_markers" include { phase: TEST stage: "test-on-test" }
transform_param { mirror: false crop_size: 227 mean_value: 103.939 mean_value: 116.779 mean_value: 128.68 }
image_data_param { source: "ucf101_split1_testVideos.txt" batch_size: 48 new_height: 240 new_width: 320 root_folder: "/work/na" }
}
template <typename Dtype>
class BaseDataLayer : public Layer<Dtype> {
protected:
bool output_clip_markers_;
};
template <typename Dtype>
class Batch {
public:
Blob<Dtype> data_, label_,clip_markers_;
};
template <typename Dtype>
void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
if (top.size() == 1) {
output_labels_ = false;
output_clip_markers_ = false;
} else if(top.size() == 2) {
output_labels_ = true;
output_clip_markers_ = false;
} else if(top.size() == 3){
output_labels_ = true;
output_clip_markers_ = true;
}
}
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
BaseDataLayer<Dtype>::LayerSetUp(bottom, top);
for (int i = 0; i < PREFETCH_COUNT; ++i) {
prefetch_[i].data_.mutable_cpu_data();
if (this->output_labels_) {
prefetch_[i].label_.mutable_cpu_data();
}
if (this->output_clip_markers_) {
prefetch_[i].clip_markers_.mutable_cpu_data();
}
}
#ifndef CPU_ONLY
if (Caffe::mode() == Caffe::GPU) {
for (int i = 0; i < PREFETCH_COUNT; ++i) {
prefetch_[i].data_.mutable_gpu_data();
if (this->output_labels_) {
prefetch_[i].label_.mutable_gpu_data();
}
if (this->output_clip_markers_) {
prefetch_[i].clip_markers_.mutable_gpu_data();
}
}
}
#endif
}
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
//...
if (this->output_labels_) {
// Reshape to loaded labels.
top[1]->ReshapeLike(batch->label_);
// Copy the labels.
caffe_copy(batch->label_.count(), batch->label_.cpu_data(),
top[1]->mutable_cpu_data());
}
if (this->output_clip_markers_) {
top[2]->ReshapeLike(batch->clip_markers_);
caffe_copy(batch->clip_markers_.count(), batch->clip_markers_.cpu_data(),
top[2]->mutable_cpu_data());
}
prefetch_free_.push(batch);
/*LOG(INFO) << top.size(); for(int i=1;i<top.size();++i){ LOG(INFO) << "top data " << i; const Dtype *top_cpu_data = top[i]->cpu_data(); for(int j=0;j<top[i]->count();++j){ LOG(INFO) << top_cpu_data[j]; } }*/
}
template <typename Dtype>
class ImageDataLayer : public BasePrefetchingDataLayer<Dtype> {
public:
virtual inline int ExactNumTopBlobs() const { return 3; }
protected:
int Rand(int n);
};
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#include <fstream> // NOLINT(readability/streams)
#include <iostream> // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>
#include "caffe/data_transformer.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/layers/image_data_layer.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"
#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/contrib/contrib.hpp>
namespace caffe {
void returnImageList(string ImagePath, vector<string>& fileNames)
{
cv::Directory dir;
fileNames = dir.GetListFiles(ImagePath, "*", false);
}
string fileparts(string filename)
{
int idx0 = filename.find_first_of("/");
string a = filename.substr(idx0+1,filename.length()-1);
return a;
}
template <typename Dtype>
ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() {
this->StopInternalThread();
}
template <typename Dtype>
void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int new_height = this->layer_param_.image_data_param().new_height();
const int new_width = this->layer_param_.image_data_param().new_width();
const bool is_color = this->layer_param_.image_data_param().is_color();
string root_folder = this->layer_param_.image_data_param().root_folder();
const int batch_size = this->layer_param_.image_data_param().batch_size();
CHECK((new_height == 0 && new_width == 0) ||
(new_height > 0 && new_width > 0)) << "Current implementation requires "
"new_height and new_width to be set at the same time.";
// Read the file with filenames and labels
const string& source = this->layer_param_.image_data_param().source();
LOG(INFO) << "Opening file " << source;
std::ifstream infile(source.c_str());
string filename;
int label;
while (infile >> filename >> label) {
lines_.push_back(std::make_pair(filename, label));
}
if (this->layer_param_.image_data_param().shuffle()) {
// randomly shuffle data
LOG(INFO) << "Shuffling data";
//const unsigned int prefetch_rng_seed = caffe_rng_rand();
//prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
ShuffleImages();
}
const unsigned int prefetch_rng_seed = caffe_rng_rand();
prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
LOG(INFO) << "A total of " << lines_.size() << " images.";
lines_id_ = 0;
// Check if we would need to randomly skip a few data points
if (this->layer_param_.image_data_param().rand_skip()) {
unsigned int skip = caffe_rng_rand() %
this->layer_param_.image_data_param().rand_skip();
LOG(INFO) << "Skipping first " << skip << " data points.";
CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
lines_id_ = skip;
}
string imagePath=root_folder + "/" + lines_[lines_id_].first;
vector<string> fileNames;
if (this->output_clip_markers_){
imagePath=root_folder + "/" + fileparts(lines_[lines_id_].first);
returnImageList(imagePath, fileNames);
imagePath=imagePath + "/"+ fileNames[0];
vector<int> clipmarkers_shape(1, batch_size);
top[2]->Reshape(clipmarkers_shape);
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].clip_markers_.Reshape(clipmarkers_shape);
}
}
// Read an image, and use it to initialize the top blob.
cv::Mat cv_img = ReadImageToCVMat(imagePath,
new_height, new_width, is_color);
CHECK(cv_img.data) << "Could not load " << imagePath;
// Use data_transformer to infer the expected blob shape from a cv_image.
vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
this->transformed_data_.Reshape(top_shape);
// Reshape prefetch_data and top[0] according to the batch_size.
CHECK_GT(batch_size, 0) << "Positive batch size required";
top_shape[0] = batch_size;
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].data_.Reshape(top_shape);
}
top[0]->Reshape(top_shape);
LOG(INFO) << "output data size: " << top[0]->num() << ","
<< top[0]->channels() << "," << top[0]->height() << ","
<< top[0]->width();
// label
vector<int> label_shape(1, batch_size);
top[1]->Reshape(label_shape);
for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
this->prefetch_[i].label_.Reshape(label_shape);
}
}
template <typename Dtype>
void ImageDataLayer<Dtype>::ShuffleImages() {
caffe::rng_t* prefetch_rng =
static_cast<caffe::rng_t*>(prefetch_rng_->generator());
shuffle(lines_.begin(), lines_.end(), prefetch_rng);
}
template <typename Dtype>
void DataTransformer<Dtype>::InitRand() {
const bool needs_rand = param_.mirror() ||
(phase_ == TRAIN && param_.crop_size());
if (needs_rand) {
const unsigned int rng_seed = caffe_rng_rand();
rng_.reset(new Caffe::RNG(rng_seed));
} else {
rng_.reset();
}
}
template <typename Dtype>
int ImageDataLayer<Dtype>::Rand(int n) {
CHECK(prefetch_rng_);
CHECK_GT(n, 0);
caffe::rng_t* prefetch_rng =
static_cast<caffe::rng_t*>(prefetch_rng_->generator());
return ((*prefetch_rng)() % n);
}
// This function is called on prefetch thread
template <typename Dtype>
void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
CPUTimer batch_timer;
batch_timer.Start();
double read_time = 0;
double trans_time = 0;
double dir_time=0;
double idx_time=0;
CPUTimer timer;
CHECK(batch->data_.count());
CHECK(this->transformed_data_.count());
ImageDataParameter image_data_param = this->layer_param_.image_data_param();
const int batch_size = image_data_param.batch_size();
const int new_height = image_data_param.new_height();
const int new_width = image_data_param.new_width();
const bool is_color = image_data_param.is_color();
string root_folder = image_data_param.root_folder();
// Reshape according to the first image of each batch
// on single input batches allows for inputs of varying dimension.
string imagePath=root_folder + "/"+ lines_[lines_id_].first;
vector<string> fileNames;
int tbuffer;
if (this->output_clip_markers_){
tbuffer = batch_size / 16;
imagePath=root_folder + "/" + fileparts(lines_[lines_id_].first);
returnImageList(imagePath, fileNames);
imagePath=imagePath + "/"+fileNames[0];
}
// Read an image, and use it to initialize the top blob.
cv::Mat cv_img = ReadImageToCVMat(imagePath,
new_height, new_width, is_color);
CHECK(cv_img.data) << "Could not load " << imagePath;
// Use data_transformer to infer the expected blob shape from a cv_img.
vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
this->transformed_data_.Reshape(top_shape);
// Reshape batch according to the batch_size.
top_shape[0] = batch_size;
batch->data_.Reshape(top_shape);
Dtype* prefetch_data = batch->data_.mutable_cpu_data();
Dtype* prefetch_label = batch->label_.mutable_cpu_data();
// datum scales
const int lines_size = lines_.size();
if (this->output_clip_markers_){
Dtype* prefetch_clip_markers = batch->clip_markers_.mutable_cpu_data();
for (int item_id = 0; item_id < tbuffer; ++item_id) {
// get a blob
CHECK_GT(lines_size, lines_id_);
timer.Start();
imagePath=root_folder + "/" + fileparts(lines_[lines_id_].first);
returnImageList(imagePath, fileNames);
dir_time += timer.MicroSeconds();
int randID = Rand(fileNames.size()-16+1);//(rand() % (fileNames.size()-16+1));
DLOG(INFO) << imagePath<<"-randID:"<<randID;
string imagePath1;
for(int image_id=randID;image_id<16+randID;++image_id){
timer.Start();
imagePath1=imagePath+ "/"+fileNames[image_id];
cv::Mat cv_img = ReadImageToCVMat(imagePath1,
new_height, new_width, is_color);
read_time += timer.MicroSeconds();
CHECK(cv_img.data) << "Could not load " << imagePath1;
//LOG(INFO) << "ImagePath1" << imagePath1<<new_height<<new_width;
timer.Start();
/* suppose tbuffer=4 * / 0 1 2 3 4 5 6 ... 16 * 0/ 0 4 8 12 16 20 24 ... 64 * 1/ 1 5 9 13 17 21 25 ... 65 * 2/ 2 6 10 14 18 22 26 ... 66 * 3/ 3 7 11 15 19 23 27 ... 67 * */
int imgPosition = tbuffer*(image_id-randID)+item_id;
int offset = batch->data_.offset(imgPosition);
// Apply transformations (mirror, crop...) to the image
this->transformed_data_.set_cpu_data(prefetch_data + offset);
//int rid=item_id*16+(image_id-randID);
prefetch_label[imgPosition] = lines_[lines_id_].second;
if(image_id==randID){
this->data_transformer_->Transform(cv_img, &(this->transformed_data_),true);
prefetch_clip_markers[imgPosition] = 0;
}else{
this->data_transformer_->Transform(cv_img, &(this->transformed_data_),false);
prefetch_clip_markers[imgPosition] = 1;}
idx_time += timer.MicroSeconds();
}
// go to the next iter
lines_id_++;
if (lines_id_ >= lines_size) {
// We have reached the end. Restart from the first.
DLOG(INFO) << "Restarting data prefetching from start.";
lines_id_ = 0;
}
}
batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << "Directory time: " << dir_time / 1000 << " ms.";
DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
DLOG(INFO) << "ChangeIdx time: " << idx_time / 1000 << " ms.";
}else{
for (int item_id = 0; item_id < batch_size; ++item_id) {
// get a blob
timer.Start();
CHECK_GT(lines_size, lines_id_);
cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
new_height, new_width, is_color);
CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
read_time += timer.MicroSeconds();
timer.Start();
// Apply transformations (mirror, crop...) to the image
int offset = batch->data_.offset(item_id);
this->transformed_data_.set_cpu_data(prefetch_data + offset);
this->data_transformer_->Transform(cv_img, &(this->transformed_data_));
trans_time += timer.MicroSeconds();
prefetch_label[item_id] = lines_[lines_id_].second;
// go to the next iter
lines_id_++;
if (lines_id_ >= lines_size) {
// We have reached the end. Restart from the first.
DLOG(INFO) << "Restarting data prefetching from start.";
lines_id_ = 0;
if (this->layer_param_.image_data_param().shuffle()) {
ShuffleImages();
}
}
}
batch_timer.Stop();
DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
}
}
INSTANTIATE_CLASS(ImageDataLayer);
REGISTER_LAYER_CLASS(ImageData);
} // namespace caffe
#endif // USE_OPENCV
void Transform(const cv::Mat& cv_img, Blob<Dtype>* transformed_blob,bool changeCrop = true);
template<typename Dtype>
void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
Blob<Dtype>* transformed_blob,bool changeCrop) {
//...
if (crop_size) {
CHECK_EQ(crop_size, height);
CHECK_EQ(crop_size, width);
// We only do random crop when we do training.
if ((phase_ == TRAIN) && changeCrop) {
h_off = Rand(img_height - crop_size + 1);
w_off = Rand(img_width - crop_size + 1);
former_h_off = h_off;
former_w_off = w_off;
} else if((phase_ == TRAIN) && !changeCrop){
h_off = former_h_off;
w_off = former_w_off;
} else if(phase_ == TEST) {
h_off = (img_height - crop_size) / 2;
w_off = (img_width - crop_size) / 2;
}
DLOG(INFO)<<h_off<<","<<w_off;
cv::Rect roi(w_off, h_off, crop_size, crop_size);
cv_cropped_img = cv_img(roi);
} else {
CHECK_EQ(img_height, height);
CHECK_EQ(img_width, width);
}
//....
}
template <typename Dtype>
void Net<Dtype>::Init(const NetParameter& in_param) {
/*CHECK(Caffe::root_solver() || root_net_) << "root_net_ needs to be set for all non-root solvers";*/
bool isLSTMLayer=false;
if(!Caffe::root_solver() && root_net_==NULL)
isLSTMLayer = true;
// Set phase from the state.
//....
for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) {
// For non-root solvers, whether this layer is shared from root_net_.
bool share_from_root;
if(isLSTMLayer)
share_from_root = false;
else
share_from_root = !Caffe::root_solver()
&& root_net_->layers_[layer_id]->ShareInParallel();
// Inherit phase from net if unset.
// ...
#include <vector>
#include "caffe/layers/base_data_layer.hpp"
namespace caffe {
template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::Forward_gpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty");
// Reshape to loaded data.
top[0]->ReshapeLike(batch->data_);
// Copy the data
caffe_copy(batch->data_.count(), batch->data_.gpu_data(),
top[0]->mutable_gpu_data());
if (this->output_labels_) {
// Reshape to loaded labels.
top[1]->ReshapeLike(batch->label_);
// Copy the labels.
caffe_copy(batch->label_.count(), batch->label_.gpu_data(),
top[1]->mutable_gpu_data());
}
if (this->output_clip_markers_) {
top[2]->ReshapeLike(batch->clip_markers_);
caffe_copy(batch->clip_markers_.count(), batch->clip_markers_.gpu_data(),
top[2]->mutable_gpu_data());
}
// Ensure the copy is synchronous wrt the host, so that the next batch isn't
// copied in meanwhile.
CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault));
prefetch_free_.push(batch);
}
INSTANTIATE_LAYER_GPU_FORWARD(BasePrefetchingDataLayer);
} // namespace caffe
message LossParameter {
//...
optional NormalizationMode normalization = 3;
optional bool normalize = 2[default = false];
}