[caffe] Long-term Recurrent Convolutional Networks

imagePathPaper: http://jeffdonahue.com/lrcn/
Code: the “lstm_video_deploy” branch of Lisa Anne Hendricks’s Caffe fork

Python Layer

train_test_lstm_RGB.prototxt

name: "lstm_joints"
layer { name: "data" type: "Python" top: "data" top: "label" top: "clip_markers" python_param { module: "sequence_input_layer" layer: "videoReadTrain_RGB" }
  include: { phase: TRAIN }
}

layer_factory.cpp

template <typename Dtype>
shared_ptr<Layer<Dtype> > GetPythonLayer(const LayerParameter& param) {
  Py_Initialize();
  try {
    // open "sequence_input_layer.py"
    bp::object module = bp::import(param.python_param().module().c_str());
    //class videoReadTrain_RGB(videoRead)对象
    bp::object layer = module.attr(param.python_param().layer().c_str())(param);
    // extract<T> can be used to extract a value of an arbitrary C++ type from an instance Of object
    return bp::extract<shared_ptr<PythonLayer<Dtype> > >(layer)();
  } catch (bp::error_already_set) {
    PyErr_Print();
    throw;
  }
}

REGISTER_LAYER_CREATOR(Python, GetPythonLayer);

python_layer.cpp

//python层的主要函数是在LayerSetup和Forward时.
template <typename Dtype>
class PythonLayer : public Layer<Dtype> {
 public:
  PythonLayer(PyObject* self, const LayerParameter& param)
      : Layer<Dtype>(param), self_(self) { }//初始化

  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
    try {
      bp::call_method<bp::object>(self_, "setup", bottom, top);
    } catch (bp::error_already_set) {
      PyErr_Print();
      throw;
    }
  }

  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
    try {
      bp::call_method<bp::object>(self_, "reshape", bottom, top);
    } catch (bp::error_already_set) {
      PyErr_Print();
      throw;
    }
  }

  virtual inline const char* type() const { return "Python"; }

 protected:
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
    try {
      bp::call_method<bp::object>(self_, "forward", bottom, top);
    } catch (bp::error_already_set) {
      PyErr_Print();
      throw;
    }
  }
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
    try {
      bp::call_method<bp::object>(self_, "backward", top, propagate_down,
          bottom);
    } catch (bp::error_already_set) {
      PyErr_Print();
      throw;
    }
  }

 private:
  PyObject* self_;
};

sequence_input_layer.py

RGB_frames = 'RGBframes/'
test_frames = 16 
train_frames = 16
test_buffer = 3
train_buffer = 24

class videoReadTrain_RGB(videoRead):

  def initialize(self):
    self.train_or_test = 'train'
    self.flow = False
    self.buffer_size = train_buffer  #num videos processed per batch(24) 
    # 如果video memory(gpu)不够,可以调整 
    self.frames = train_frames   #length of processed clip(16)
    # 每个video里选16张连续frames.这个值不可以调整,因为程序写死了.
    self.N = self.buffer_size*self.frames
    self.idx = 0
    self.channels = 3
    self.height = 227
    self.width = 227
    self.path_to_images = RGB_frames 
    self.video_list = 'ucf101_split1_trainVideos.txt' 
    #内容如下: eventA/eventA下的某个视频 eventA的标签
    #TableTennisShot/v_TableTennisShot_g19_c03 89
    #MilitaryParade/v_MilitaryParade_g09_c06 52
    #RopeClimbing/v_RopeClimbing_g16_c01 74
class videoRead(caffe.Layer):

  def setup(self, bottom, top):
    #注意这里!这就保证了每次重新跑实验的时候生成的随机序列都是相同的!
    random.seed(10)
    self.initialize()
    f = open(self.video_list, 'r')
    f_lines = f.readlines()
    f.close()

    video_dict = {}
    current_line = 0
    self.video_order = []
    for ix, line in enumerate(f_lines):
      #line: TableTennisShot/v_TableTennisShot_g19_c03 89
      video = line.split(' ')[0].split('/')[1]  #v_TableTennisShot_g19_c03
      l = int(line.split(' ')[1])  #89
      #frames:RGBframes/v_TableTennisShot_g19_c03下所有的jpg图片
      frames = glob.glob('%s%s/*.jpg' %(self.path_to_images, video))       
      num_frames = len(frames)
      video_dict[video] = {}
      #video_dict[video]['frames']:RGBframes/v_TableTennisShot_g19_c03/%04d.jpg
      video_dict[video]['frames'] = frames[0].split('.')[0] + '.%04d.jpg' %(self.path_to_images, video)) 
      video_dict[video]['reshape'] = (240,320)
      video_dict[video]['crop'] = (227, 227)
      video_dict[video]['num_frames'] = num_frames
      video_dict[video]['label'] = l
      self.video_order.append(video) 

    self.video_dict = video_dict
    self.num_videos = len(video_dict.keys())

    #set up data transformer
    shape = (self.N, self.channels, self.height, self.width)

    self.transformer = caffe.io.Transformer({'data_in': shape})
    self.transformer.set_raw_scale('data_in', 255)
    if self.flow:
      image_mean = [128, 128, 128]
      self.transformer.set_is_flow('data_in', True)
    else:
      image_mean = [103.939, 116.779, 128.68]
      self.transformer.set_is_flow('data_in', False)
    #Three 227x227 matrices while all the elements are zero
    channel_mean = np.zeros((3,227,227)) 
    for channel_index, mean_val in enumerate(image_mean):
      #all elements of 1st matrix become 103.939. 
      #all elements of 2rd matrix become 116.779. 
      #all elements of 3th matrix become 128.68
      channel_mean[channel_index, ...] = mean_val 
    self.transformer.set_mean('data_in', channel_mean)
    self.transformer.set_channel_swap('data_in', (2, 1, 0))
    self.transformer.set_transpose('data_in', (2, 0, 1))

    self.thread_result = {}
    self.thread = None
    pool_size = 24

    self.image_processor = ImageProcessorCrop(self.transformer, self.flow)
    self.sequence_generator = sequenceGeneratorVideo(self.buffer_size, self.frames, self.num_videos, self.video_dict, self.video_order)

    self.pool = Pool(processes=pool_size)
    self.batch_advancer = BatchAdvancer(self.thread_result, self.sequence_generator, self.image_processor, self.pool)
    ########################################
    self.dispatch_worker()
    ########################################
    self.top_names = ['data', 'label','clip_markers']
    print 'Outputs:', self.top_names
    if len(top) != len(self.top_names):
      raise Exception('Incorrect number of outputs (expected %d, got %d)' %
                      (len(self.top_names), len(top)))
    ########################################
    self.join_worker()
    ########################################
    for top_index, name in enumerate(self.top_names):
      if name == 'data':
        shape = (self.N, self.channels, self.height, self.width)
      elif name == 'label':
        shape = (self.N,)#只有一个元素的tuple
      elif name == 'clip_markers':
        shape = (self.N,)#只有一个元素的tuple
      # * 表示传入的参数的个数不定
      # reshape函数:以shape = (self.N, self.channels, self.height, self.width)为例
      # top[top_index]会变成self.N个三维数组
      # 每个三维数组是self.channels个高self.height宽self.width的矩阵
      top[top_index].reshape(*shape)

  def reshape(self, bottom, top):
    pass

  def forward(self, bottom, top):

    if self.thread is not None:
      #########################################
      self.join_worker() 
      #########################################

    #rearrange the data: 
    #The LSTM takes inputs as [video0_frame0, video1_frame0,...] 
    #but the data is currently arranged as [video0_frame0, video0_frame1, ...]
    new_result_data = [None]*len(self.thread_result['data']) 
    new_result_label = [None]*len(self.thread_result['label']) 
    new_result_cm = [None]*len(self.thread_result['clip_markers'])
    for i in range(self.frames):
      for ii in range(self.buffer_size):
        old_idx = ii*self.frames + i
        new_idx = i*self.buffer_size + ii
        new_result_data[new_idx] = self.thread_result['data'][old_idx]
        new_result_label[new_idx] = self.thread_result['label'][old_idx]
        new_result_cm[new_idx] = self.thread_result['clip_markers'][old_idx]

    for top_index, name in zip(range(len(top)), self.top_names):
      if name == 'data':
        for i in range(self.N):
          top[top_index].data[i, ...] = new_result_data[i] 
      elif name == 'label':
        top[top_index].data[...] = new_result_label
      elif name == 'clip_markers':
        top[top_index].data[...] = new_result_cm

    #################################
    self.dispatch_worker()
    #################################

  def dispatch_worker(self):
    assert self.thread is None
    self.thread = Thread(target=self.batch_advancer)
    #start(): 因为self.batch_advancer是Class BatchAdvancer的对象.
    #所以调用Class BatchAdvancer的__call__函数
    #从而调用advance_batch函数
    self.thread.start()

  def join_worker(self):
    assert self.thread is not None
    #join(): Wait until the thread terminates. 
    #This blocks the calling thread until the thread whose join() method is called terminates
    self.thread.join() 
    self.thread = None

  def backward(self, top, propagate_down, bottom):
    pass
def advance_batch(result, sequence_generator, image_processor, pool):
    #sequence_generator() 调用sequenceGeneratorVideo类里的__call__
    label_r, im_info = sequence_generator()
    tmp = image_processor(im_info[0])
    result['data'] = pool.map(image_processor, im_info)
    result['label'] = label_r
    cm = np.ones(len(label_r))
    cm[0::16] = 0
    #cm起了分割不同视频的作用. trainbuffer=24,cliplength=16
    #24x16的全1矩阵,第一列的24个元素全为0.将这个矩阵按行展开就是cm.
    result['clip_markers'] = cm
class sequenceGeneratorVideo(object):
  def __init__(self, buffer_size, clip_length, num_videos, video_dict, video_order):
    self.buffer_size = buffer_size
    self.clip_length = clip_length
    self.N = self.buffer_size*self.clip_length
    self.num_videos = num_videos
    self.video_dict = video_dict
    self.video_order = video_order
    self.idx = 0 

  def __call__(self):
    label_r = []
    im_paths = []
    im_crop = []
    im_reshape = []  
    im_flip = []

    if self.idx + self.buffer_size >= self.num_videos:
      idx_list = range(self.idx, self.num_videos)
      idx_list.extend(range(0, self.buffer_size-(self.num_videos-self.idx)))
    else:
      #(train)buffer_size=24
      idx_list = range(self.idx, self.idx+self.buffer_size)

    #24 videos
    for i in idx_list:
      key = self.video_order[i]
      label = self.video_dict[key]['label']
      video_reshape = self.video_dict[key]['reshape']
      video_crop = self.video_dict[key]['crop']
      #clip_length=16.So 16 elements with same value=[label] will be added to label_r
      label_r.extend([label]*self.clip_length)  

      im_reshape.extend([(video_reshape)]*self.clip_length)
      r0 = int(random.random()*(video_reshape[0] - video_crop[0]))
      r1 = int(random.random()*(video_reshape[1] - video_crop[1]))
      im_crop.extend([(r0, r1, r0+video_crop[0], r1+video_crop[1])]*self.clip_length)     
      f = random.randint(0,1)
      im_flip.extend([f]*self.clip_length)
      rand_frame = int(random.random()*(self.video_dict[key]['num_frames']-self.clip_length)+1+1)
      frames = []

      #frames里存[self.clip_length=16]张连续图片
      for i in range(rand_frame,rand_frame+self.clip_length):
        frames.append(self.video_dict[key]['frames'] %i)

      #for循环结束的时候im_paths里24x16张图片
      im_paths.extend(frames) 

    #z1=[1,2,3]
    #z2=[4,5,6]
    #result=zip(z1,z2)
    #[(1, 4), (2, 5), (3, 6)]
    im_info = zip(im_paths,im_crop, im_reshape, im_flip)

    #这就保证了每次进来这个__call__函数都访问的是不同的videos
    self.idx += self.buffer_size
    if self.idx >= self.num_videos:
      self.idx = self.idx - self.num_videos

    return label_r, im_info

python layer-> image_data_layer

第一层python层是不支持multi-gpu的….因为lock会出问题.
所以来改image_data_layer作为数据第一层吧.

train_test_lstm_RGB.prototxt

layer { name: "data" type: "ImageData" top: "data" top: "label" top: "clip_markers" include { phase: TRAIN }
  transform_param { mirror: true crop_size: 227 mean_value: 103.939 mean_value: 116.779 mean_value: 128.68 }
  image_data_param { source: "ucf101_split1_trainVideos.txt" batch_size: 32 #16*2 new_height: 240 new_width: 320 root_folder: "/work/na" }
}
layer { name: "data" type: "ImageData" top: "data" top: "label" top: "clip_markers" include { phase: TEST stage: "test-on-test" }
  transform_param { mirror: false crop_size: 227 mean_value: 103.939 mean_value: 116.779 mean_value: 128.68 }
  image_data_param { source: "ucf101_split1_testVideos.txt" batch_size: 48 new_height: 240 new_width: 320 root_folder: "/work/na" }
}

base_data_layer.hpp

template <typename Dtype>
class BaseDataLayer : public Layer<Dtype> {
 protected:
  bool output_clip_markers_;
};

template <typename Dtype>
class Batch {
 public:
  Blob<Dtype> data_, label_,clip_markers_;
};

base_data_layer.cpp

template <typename Dtype>
void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  if (top.size() == 1) {
    output_labels_ = false;
    output_clip_markers_ = false;
  } else if(top.size() == 2) {
    output_labels_ = true;
    output_clip_markers_ = false;
  } else if(top.size() == 3){
    output_labels_ = true;
    output_clip_markers_ = true;
  }
}

template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  BaseDataLayer<Dtype>::LayerSetUp(bottom, top);
  for (int i = 0; i < PREFETCH_COUNT; ++i) {
    prefetch_[i].data_.mutable_cpu_data();
    if (this->output_labels_) {
      prefetch_[i].label_.mutable_cpu_data();
    }
    if (this->output_clip_markers_) {
      prefetch_[i].clip_markers_.mutable_cpu_data();
    }
  }
#ifndef CPU_ONLY
  if (Caffe::mode() == Caffe::GPU) {
    for (int i = 0; i < PREFETCH_COUNT; ++i) {
      prefetch_[i].data_.mutable_gpu_data();
      if (this->output_labels_) {
        prefetch_[i].label_.mutable_gpu_data();
      }
      if (this->output_clip_markers_) {
        prefetch_[i].clip_markers_.mutable_gpu_data();
      } 
    }
  }
#endif
}

template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  //...
  if (this->output_labels_) {
    // Reshape to loaded labels.
    top[1]->ReshapeLike(batch->label_);
    // Copy the labels.
    caffe_copy(batch->label_.count(), batch->label_.cpu_data(),
        top[1]->mutable_cpu_data());
  }
  if (this->output_clip_markers_) {
    top[2]->ReshapeLike(batch->clip_markers_);
    caffe_copy(batch->clip_markers_.count(), batch->clip_markers_.cpu_data(),
        top[2]->mutable_cpu_data());
  }

  prefetch_free_.push(batch);
/*LOG(INFO) << top.size(); for(int i=1;i<top.size();++i){ LOG(INFO) << "top data " << i; const Dtype *top_cpu_data = top[i]->cpu_data(); for(int j=0;j<top[i]->count();++j){ LOG(INFO) << top_cpu_data[j]; } }*/
}

image_data_layer.hpp

template <typename Dtype>
class ImageDataLayer : public BasePrefetchingDataLayer<Dtype> {
 public:
  virtual inline int ExactNumTopBlobs() const { return 3; }

 protected:
  int Rand(int n);
};

image_data_layer.cpp

#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>

#include <fstream> // NOLINT(readability/streams)
#include <iostream> // NOLINT(readability/streams)
#include <string>
#include <utility>
#include <vector>

#include "caffe/data_transformer.hpp"
#include "caffe/layers/base_data_layer.hpp"
#include "caffe/layers/image_data_layer.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/util/rng.hpp"

#include <opencv2/opencv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/contrib/contrib.hpp>


namespace caffe {

void returnImageList(string ImagePath, vector<string>& fileNames)
{
    cv::Directory dir;
    fileNames = dir.GetListFiles(ImagePath, "*", false);
}

string fileparts(string filename)
{
    int idx0 = filename.find_first_of("/");
    string a = filename.substr(idx0+1,filename.length()-1);
    return a;
}

template <typename Dtype>
ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() {
    this->StopInternalThread();
}

template <typename Dtype>
void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
        const vector<Blob<Dtype>*>& top) {
    const int new_height = this->layer_param_.image_data_param().new_height();
    const int new_width  = this->layer_param_.image_data_param().new_width();
    const bool is_color  = this->layer_param_.image_data_param().is_color();
    string root_folder = this->layer_param_.image_data_param().root_folder();
    const int batch_size = this->layer_param_.image_data_param().batch_size();

    CHECK((new_height == 0 && new_width == 0) ||
            (new_height > 0 && new_width > 0)) << "Current implementation requires "
                    "new_height and new_width to be set at the same time.";
    // Read the file with filenames and labels
    const string& source = this->layer_param_.image_data_param().source();
    LOG(INFO) << "Opening file " << source;
    std::ifstream infile(source.c_str());
    string filename;
    int label;
    while (infile >> filename >> label) {
        lines_.push_back(std::make_pair(filename, label));
    }

    if (this->layer_param_.image_data_param().shuffle()) {
        // randomly shuffle data
        LOG(INFO) << "Shuffling data";
        //const unsigned int prefetch_rng_seed = caffe_rng_rand();
        //prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
        ShuffleImages();
    }

    const unsigned int prefetch_rng_seed = caffe_rng_rand();
    prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));

    LOG(INFO) << "A total of " << lines_.size() << " images.";

    lines_id_ = 0;
    // Check if we would need to randomly skip a few data points
    if (this->layer_param_.image_data_param().rand_skip()) {
        unsigned int skip = caffe_rng_rand() %
                this->layer_param_.image_data_param().rand_skip();
        LOG(INFO) << "Skipping first " << skip << " data points.";
        CHECK_GT(lines_.size(), skip) << "Not enough points to skip";
        lines_id_ = skip;
    }

    string imagePath=root_folder + "/" + lines_[lines_id_].first;
    vector<string> fileNames;
    if (this->output_clip_markers_){
        imagePath=root_folder + "/" + fileparts(lines_[lines_id_].first);
        returnImageList(imagePath, fileNames);
        imagePath=imagePath + "/"+ fileNames[0];

        vector<int> clipmarkers_shape(1, batch_size);
        top[2]->Reshape(clipmarkers_shape);
        for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
            this->prefetch_[i].clip_markers_.Reshape(clipmarkers_shape);
        }
    }
    // Read an image, and use it to initialize the top blob.
    cv::Mat cv_img = ReadImageToCVMat(imagePath,
            new_height, new_width, is_color);
    CHECK(cv_img.data) << "Could not load " << imagePath;
    // Use data_transformer to infer the expected blob shape from a cv_image.
    vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
    this->transformed_data_.Reshape(top_shape);
    // Reshape prefetch_data and top[0] according to the batch_size.
    CHECK_GT(batch_size, 0) << "Positive batch size required";
    top_shape[0] = batch_size;
    for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
        this->prefetch_[i].data_.Reshape(top_shape);
    }
    top[0]->Reshape(top_shape);

    LOG(INFO) << "output data size: " << top[0]->num() << ","
            << top[0]->channels() << "," << top[0]->height() << ","
            << top[0]->width();
    // label
    vector<int> label_shape(1, batch_size);
    top[1]->Reshape(label_shape);
    for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
        this->prefetch_[i].label_.Reshape(label_shape);
    }
}


template <typename Dtype>
void ImageDataLayer<Dtype>::ShuffleImages() {
    caffe::rng_t* prefetch_rng =
            static_cast<caffe::rng_t*>(prefetch_rng_->generator());
    shuffle(lines_.begin(), lines_.end(), prefetch_rng);
}

template <typename Dtype>
void DataTransformer<Dtype>::InitRand() {
  const bool needs_rand = param_.mirror() ||
      (phase_ == TRAIN && param_.crop_size());
  if (needs_rand) {
    const unsigned int rng_seed = caffe_rng_rand();
    rng_.reset(new Caffe::RNG(rng_seed));
  } else {
    rng_.reset();
  }
}

template <typename Dtype>
int ImageDataLayer<Dtype>::Rand(int n) {
  CHECK(prefetch_rng_);
  CHECK_GT(n, 0);
  caffe::rng_t* prefetch_rng =
      static_cast<caffe::rng_t*>(prefetch_rng_->generator());
  return ((*prefetch_rng)() % n);
}

// This function is called on prefetch thread
template <typename Dtype>
void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
    CPUTimer batch_timer;
    batch_timer.Start();
    double read_time = 0;
    double trans_time = 0;
    double dir_time=0;
    double idx_time=0;
    CPUTimer timer;
    CHECK(batch->data_.count());
    CHECK(this->transformed_data_.count());
    ImageDataParameter image_data_param = this->layer_param_.image_data_param();
    const int batch_size = image_data_param.batch_size();
    const int new_height = image_data_param.new_height();
    const int new_width = image_data_param.new_width();
    const bool is_color = image_data_param.is_color();
    string root_folder = image_data_param.root_folder();


    // Reshape according to the first image of each batch
    // on single input batches allows for inputs of varying dimension.
    string imagePath=root_folder + "/"+ lines_[lines_id_].first;
    vector<string> fileNames;
    int tbuffer;
    if (this->output_clip_markers_){
        tbuffer = batch_size / 16;
        imagePath=root_folder + "/" + fileparts(lines_[lines_id_].first);
        returnImageList(imagePath, fileNames);
        imagePath=imagePath + "/"+fileNames[0];
    }
    // Read an image, and use it to initialize the top blob.
    cv::Mat cv_img = ReadImageToCVMat(imagePath,
            new_height, new_width, is_color);
    CHECK(cv_img.data) << "Could not load " << imagePath;
    // Use data_transformer to infer the expected blob shape from a cv_img.
    vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);
    this->transformed_data_.Reshape(top_shape);
    // Reshape batch according to the batch_size.
    top_shape[0] = batch_size;
    batch->data_.Reshape(top_shape);

    Dtype* prefetch_data = batch->data_.mutable_cpu_data();
    Dtype* prefetch_label = batch->label_.mutable_cpu_data();

    // datum scales
    const int lines_size = lines_.size();
    if (this->output_clip_markers_){
        Dtype* prefetch_clip_markers = batch->clip_markers_.mutable_cpu_data();
        for (int item_id = 0; item_id < tbuffer; ++item_id) {
            // get a blob
            CHECK_GT(lines_size, lines_id_);

            timer.Start();
            imagePath=root_folder + "/" + fileparts(lines_[lines_id_].first);
            returnImageList(imagePath, fileNames);
            dir_time += timer.MicroSeconds();

            int randID = Rand(fileNames.size()-16+1);//(rand() % (fileNames.size()-16+1));
            DLOG(INFO) << imagePath<<"-randID:"<<randID;
            string imagePath1;
            for(int image_id=randID;image_id<16+randID;++image_id){
                timer.Start();
                imagePath1=imagePath+ "/"+fileNames[image_id];
                cv::Mat cv_img = ReadImageToCVMat(imagePath1,
                        new_height, new_width, is_color);
                read_time += timer.MicroSeconds();

                CHECK(cv_img.data) << "Could not load " << imagePath1;
                //LOG(INFO) << "ImagePath1" << imagePath1<<new_height<<new_width;

                timer.Start();
                /* suppose tbuffer=4 * / 0 1 2 3 4 5 6 ... 16 * 0/ 0 4 8 12 16 20 24 ... 64 * 1/ 1 5 9 13 17 21 25 ... 65 * 2/ 2 6 10 14 18 22 26 ... 66 * 3/ 3 7 11 15 19 23 27 ... 67 * */
                int imgPosition = tbuffer*(image_id-randID)+item_id;
                int offset = batch->data_.offset(imgPosition);
                // Apply transformations (mirror, crop...) to the image
                this->transformed_data_.set_cpu_data(prefetch_data + offset);
                //int rid=item_id*16+(image_id-randID);
                prefetch_label[imgPosition] = lines_[lines_id_].second;
                if(image_id==randID){
                    this->data_transformer_->Transform(cv_img, &(this->transformed_data_),true);
                    prefetch_clip_markers[imgPosition] = 0;
                }else{
                    this->data_transformer_->Transform(cv_img, &(this->transformed_data_),false);
                    prefetch_clip_markers[imgPosition] = 1;}
                idx_time += timer.MicroSeconds();
            }
            // go to the next iter
            lines_id_++;
            if (lines_id_ >= lines_size) {
                // We have reached the end. Restart from the first.
                DLOG(INFO) << "Restarting data prefetching from start.";
                lines_id_ = 0;
            }
        }
        batch_timer.Stop();
        DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
        DLOG(INFO) << "Directory time: " << dir_time / 1000 << " ms.";
                DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
        DLOG(INFO) << "ChangeIdx time: " << idx_time / 1000 << " ms.";
    }else{
        for (int item_id = 0; item_id < batch_size; ++item_id) {
            // get a blob
            timer.Start();
            CHECK_GT(lines_size, lines_id_);
            cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,
                    new_height, new_width, is_color);
            CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;
            read_time += timer.MicroSeconds();
            timer.Start();
            // Apply transformations (mirror, crop...) to the image
            int offset = batch->data_.offset(item_id);
            this->transformed_data_.set_cpu_data(prefetch_data + offset);
            this->data_transformer_->Transform(cv_img, &(this->transformed_data_));
            trans_time += timer.MicroSeconds();

            prefetch_label[item_id] = lines_[lines_id_].second;
            // go to the next iter
            lines_id_++;
            if (lines_id_ >= lines_size) {
                // We have reached the end. Restart from the first.
                DLOG(INFO) << "Restarting data prefetching from start.";
                lines_id_ = 0;
                if (this->layer_param_.image_data_param().shuffle()) {
                    ShuffleImages();
                }
            }
        }
        batch_timer.Stop();
        DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";
        DLOG(INFO) << " Read time: " << read_time / 1000 << " ms.";
        DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";
    }
}
INSTANTIATE_CLASS(ImageDataLayer);
REGISTER_LAYER_CLASS(ImageData);

}  // namespace caffe
#endif // USE_OPENCV

data_transformer.hpp

void Transform(const cv::Mat& cv_img, Blob<Dtype>* transformed_blob,bool changeCrop = true);

data_transformer.cpp

template<typename Dtype>
void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
                                       Blob<Dtype>* transformed_blob,bool changeCrop) {
  //...

  if (crop_size) {
    CHECK_EQ(crop_size, height);
    CHECK_EQ(crop_size, width);
    // We only do random crop when we do training.
    if ((phase_ == TRAIN) && changeCrop) {
      h_off = Rand(img_height - crop_size + 1);
      w_off = Rand(img_width - crop_size + 1);
      former_h_off = h_off;
      former_w_off = w_off;
    } else if((phase_ == TRAIN) && !changeCrop){
      h_off = former_h_off;
      w_off = former_w_off;
    } else if(phase_ == TEST) {
      h_off = (img_height - crop_size) / 2;
      w_off = (img_width - crop_size) / 2;
    }
    DLOG(INFO)<<h_off<<","<<w_off;
    cv::Rect roi(w_off, h_off, crop_size, crop_size);
    cv_cropped_img = cv_img(roi);
  } else {
    CHECK_EQ(img_height, height);
    CHECK_EQ(img_width, width);
  }

  //....
}

Other changes

net.cpp

template <typename Dtype>
void Net<Dtype>::Init(const NetParameter& in_param) {
  /*CHECK(Caffe::root_solver() || root_net_) << "root_net_ needs to be set for all non-root solvers";*/
  bool isLSTMLayer=false;
  if(!Caffe::root_solver() && root_net_==NULL)
    isLSTMLayer = true;
  // Set phase from the state.
  //....
  for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) {
    // For non-root solvers, whether this layer is shared from root_net_.
    bool share_from_root;
    if(isLSTMLayer)
        share_from_root = false;
    else
        share_from_root = !Caffe::root_solver()
           && root_net_->layers_[layer_id]->ShareInParallel();
    // Inherit phase from net if unset.
    // ...

base_data_layer.cu

#include <vector>

#include "caffe/layers/base_data_layer.hpp"

namespace caffe {

template <typename Dtype>
void BasePrefetchingDataLayer<Dtype>::Forward_gpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty");
  // Reshape to loaded data.
  top[0]->ReshapeLike(batch->data_);
  // Copy the data
  caffe_copy(batch->data_.count(), batch->data_.gpu_data(),
      top[0]->mutable_gpu_data());
  if (this->output_labels_) {
    // Reshape to loaded labels.
    top[1]->ReshapeLike(batch->label_);
    // Copy the labels.
    caffe_copy(batch->label_.count(), batch->label_.gpu_data(),
        top[1]->mutable_gpu_data());
  }
  if (this->output_clip_markers_) {
    top[2]->ReshapeLike(batch->clip_markers_);
    caffe_copy(batch->clip_markers_.count(), batch->clip_markers_.gpu_data(),
        top[2]->mutable_gpu_data());
  }
  // Ensure the copy is synchronous wrt the host, so that the next batch isn't
  // copied in meanwhile.
  CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault));
  prefetch_free_.push(batch);
}

INSTANTIATE_LAYER_GPU_FORWARD(BasePrefetchingDataLayer);

}  // namespace caffe

caffe.proto

message LossParameter {
  //...
  optional NormalizationMode normalization = 3;
  optional bool normalize = 2[default = false];
}

recurrent layer

inheritance

[caffe] Long-term Recurrent Convolutional Networks_第1张图片

你可能感兴趣的:([caffe] Long-term Recurrent Convolutional Networks)