class P2PSync : public GPUParams<Dtype>, public Solver<Dtype>::Callback,
public InternalThread {
class BasePrefetchingDataLayer :
public BaseDataLayer<Dtype>, public InternalThread {
class DataLayer : public BasePrefetchingDataLayer<Dtype> {
public:
// DataLayer uses DataReader instead for sharing for parallelism
virtual inline bool ShareInParallel() const { return false; }
protected:
DataReader reader_;
};
class DataReader {
protected:
// A single body is created per source
class Body : public InternalThread {
src/caffe/parallel.cpp: syncs[i]->StartInternalThread();
src/caffe/layers/base_data_layer.cpp: StartInternalThread();
src/caffe/data_reader.cpp: StartInternalThread();
设有4个gpu: GPU0, GPU1, GPU2, GPU3
跑root_solver的线程这里叫主线程(GPU0),
主线程进入sync.run(gpus)
这个函数之后,
for (int i = 1; i < syncs.size(); ++i) {
//主线程生成了3个新线程(boost thread): bt1,bt2,bt3.
//GPU1,2,3
//这三个线程就会先分别初始化自己的solver->net->layer,
//然后前传后传啥的
syncs[i]->StartInternalThread();
}
// Run root solver on current thread
//主线程()
//已经初始化完了solver->net->layer,准备开始前传后传了
solver_->Solve();
//1.继承
layer->BaseDataLayer
BaseDataLayer, InternalThread->BasePrefetchingDataLayer->ImageLayer
//2.
shareinparallel -> true
当上面的4个线程初始化imagelayer的时候:
主线程在BasePrefetchingDataLayer
的layersetup
函数中调用StartInternalThread
,也就是生成了一个prefetch的新线程(boost thread):bt_data1
因为imagelayershareinparallel -> true
,所以剩下的三个boost thread(bt1,bt2,bt3)根本就不会有对imagelayer进行layersetup
的必要.
所以可以说,只有一个imageLayer
class DataLayer : public BasePrefetchingDataLayer<Dtype> {
public:
// DataLayer uses DataReader instead for sharing for parallelism
virtual inline bool ShareInParallel() const { return false; }
protected:
DataReader reader_;
};
class DataReader {
protected:
// A single body is created per source
class Body : public InternalThread {
不同于imagelayer, datalayer的ShareInParallel
是false
,
所以,4个线程会有各自的datalayer(共4个)
但是,在主线程初始化它的datalayer时 -> reader_(主线程私有)-> body(4个线程公有)
就是说,主线程新建body后, 剩下三个线程只要调用这个body就好了,不会再新建.
而这个body又对应了一个boost thread: bt_data2
无论是imagelayer还是datalayer, 最后都保证只有一个线程在读数据.