mshadow采用了表达式模板的技巧增强了c++矩阵库的性能。
mshadow用于数据存储结构的主要继承脉络如下:
Tensor->TRValue->RValueExp->Exp
继承链的顶端是所有表达式的基类Exp:
template<typename SubType, typename DType, int exp_type>
struct Exp {
public:
/*! \return subtype instance of current class */
inline const SubType& self(void) const {
return *static_cast<const SubType*>(this);
}
/*! \return reference of subtype instance of current class */
inline SubType* ptrself(void) {
return static_cast(this);
}
};
这里Exp定义的精髓就是通过self或ptrself可以获得SubType的引用或指针,这为后面将SubType表达式作为模板参数传递后再获得SubType提供了途径。
RValueExp继承于Exp,是所有右值的基类:
template<typename Container, typename DType>
class RValueExp: public Exp {
public:
inline const TransposeExp T(void) const {
return TransposeExp(this->self());
}
/*! \brief operator overload */
inline Container &operator+=(DType s) {
ExpEngine::Eval(this->ptrself(), scalar(s));
return *(this->ptrself());
}
/*! \brief operator overload */
inline Container &operator-=(DType s) {......}
/*! \brief operator overload */
inline Container &operator*=(DType s) {......}
/*! \brief operator overload */
inline Container &operator/=(DType s) {......}
/*! \brief operator overload */
inline Container &__assign(DType s) {
ExpEngine::Eval(this->ptrself(), scalar(s));
return *(this->ptrself());
}
/*! \brief we can not define container = container */
template<typename E, int etype>
inline Container &__assign(const Exp &exp) {
ExpEngine::Eval(this->ptrself(), exp.self());
return *(this->ptrself());
}
/*! \brief operator overload, assign */
inline Container &__assign(const Exp &exp);
/*! \brief implementation of operator+= */
template<typename E, int etype>
inline Container &operator+=(const Exp &exp) {
ExpEngine::Eval(this->ptrself(), exp.self());
return *(this->ptrself());
}
/*! \brief implementation of operator-= */
template<typename E, int etype>
inline Container &operator-=(const Exp &exp) {......}
/*! \brief implementation of operator*= */
template<typename E, int etype>
inline Container &operator*=(const Exp &exp) {......}
/*! \brief implementation of operator/= */
template<typename E, int etype>
inline Container &operator/=(const Exp &exp) {......}
};
RValueExp类中的ExpEngine类重载了四种表达式类型(kMapper、kChainer、kRValue、kComplex)的Eval函数 :
template<typename SV, typename RV, typename DType>
struct ExpEngine {
template<typename E>
inline static void Eval(RV *dst,const Exp &exp) {
MapExp(dst, exp);
}
template<typename E>
inline static void Eval(RV *dst,const Exp &exp) {
MapExp(dst, exp);
}
template<typename E>
inline static void Eval(RV *dst,const Exp &exp) {
MapExp(dst, exp);
}
//用于dot
template<typename E>
inline static void Eval(RV *dst,const Exp &exp) {
ExpComplexEngine::Eval(dst->ptrself(), exp.self());
}
};
TRValue是所有可能的tensor的超类
template<typename Container, typename Device, int dimension, typename DType>
struct TRValue: public expr::RValueExp {
};
终于到了Tensor类的定义:
template<typename Device, int dimension,
typename DType MSHADOW_DEFAULT_DTYPE>
struct Tensor: public TRValue,
Device, dimension, DType> {
public:
static const bool kDevCPU = Device::kDevCPU;
static const int kSubdim = dimension - 1;
/*! \brief pointer to the data */
DType *dptr_;
/*! \brief shape of the tensor */
Shape shape_;
/*!
* \brief storing the stride information in x dimension
* this is used to deal with pitch allocation in gpu or sse(align x dimension to 64bit) for efficiency
*/
index_t stride_;
Stream *stream_;
//各种构造函数
......
/*! 从 data pointer 和shape构造Tensor */
MSHADOW_XINLINE Tensor(DType *dptr,
const Shape &shape,
index_t stride, Stream *stream)
: dptr_(dptr), shape_(shape), stride_(stride), stream_(stream) {}
......
......
MSHADOW_XINLINE Tensor
Slice(index_t begin, index_t end) const {
Shape s = this->shape_;
s[0] = end - begin;
return Tensor(dptr_ + this->MemSize<1>() * begin,
s, stride_, stream_);
}
/*!\brief implement the assignment of same type */
inline Tensor &
operator=(const Tensor &exp) {
dptr_ = exp.dptr_;
shape_ = exp.shape_;
stride_ = exp.stride_;
stream_ = exp.stream_;
return *this;
}
/*!\brief functions to fit expression template */
template<typename E, int etype>
inline Tensor &
operator=(const expr::Exp &exp) {
return this->__assign(exp);
}
/*!\brief functions to fit expression template */
inline Tensor &operator=(const DType &exp) {
return this->__assign(exp);
}
};
Tensor的shape与numpy.shape不一样,最低维度从shape_[0]开始,重载操作符“=”除了拷贝已有Tensor,还可赋值中间运算结果表达式Exp,以及赋值标量。这里对operator =的重载将运算操作延迟到了赋值阶段,实现了Lazy Evaluation,避免了临时内存分配。特别地,DotExp在operator =中实行lazily evaluate, 将矩阵的乘法重定向到了blas库。
mshadow用于表达式操作的类(DotExp、BinaryMapExp、UnaryMapExp)同样继承于Exp基类,其特点是该表达式操作类自身也作为模板参数传递给Exp,以BinaryMapExp为例:
template<typename OP, typename TA, typename TB, typename DType, int etype>
struct BinaryMapExp: public Exp,
DType, etype> {
/*! \brief left operand */
const TA &lhs_;
/*! \brief right operand */
const TB &rhs_;
/*! \brief constructor */
explicit BinaryMapExp(const TA &lhs, const TB &rhs)
:lhs_(lhs), rhs_(rhs) {}
};
template<typename OP, typename TA, typename TB, typename DType, int ta, int tb>
inline BinaryMapExp
MakeExp(const Exp &lhs, const Exp &rhs) {
return BinaryMapExp(lhs.self(), rhs.self());
}
template<typename OP, typename TA, typename TB, typename DType, int ta, int tb>
inline BinaryMapExp
F(const Exp &lhs, const Exp &rhs) {
return MakeExp(lhs, rhs);
}
template<typename TA, typename TB, typename DType, int ta, int tb>
inline BinaryMapExp
operator+(const Exp &lhs, const Exp &rhs) {
return MakeExp(lhs, rhs);
}
......
......
BinaryMapExp是双目运算的表达式类,MakeExp是用来生成BinaryMapExp类对象的函数,F是自定义操作的函数,F< OP >(lhs, rhs)描述了一个新的双目运算,除此以外,+-*/等操作符重载函数也调用MakeExp创建BinaryMapExp。
这些用于表达式操作的类(DotExp、BinaryMapExp、UnaryMapExp)表示一个运算操作的中间结果,且可以递归表示(由Plan的Eval函数完成),实现了lengthy equations的解析。
真正用于递归调用eval的是Plan类:
template<typename ExpType, typename DType>
class Plan {
public:
/*!
* \brief evaluate the expression at index [y][x]
* to be implemented by SubType, for RValue, the return type will be DType &
*/
MSHADOW_XINLINE DType Eval(index_t y, index_t x) const;
};
// tensor的plan函数
template <typename Device, int dim, typename DType>
class Plan, DType> {
public:
explicit Plan(const Tensor &t)
: dptr_(t.dptr_), stride_(t.stride_) {}
// for RValue, the return type should be reference
MSHADOW_XINLINE DType &REval(index_t y, index_t x) {
return dptr_[y * stride_ + x];
}
// const evaluation
MSHADOW_XINLINE const DType &Eval(index_t y, index_t x) const {
return dptr_[y * stride_ + x];
}
private:
DType *dptr_;
index_t stride_;
};
......
......
// 双目表达式的plan
template<typename OP, typename TA, typename TB, int etype, typename DType>
class Plan, DType> {
public:
explicit Plan(const Plan &lhs, const Plan &rhs)
: lhs_(lhs), rhs_(rhs) {}
MSHADOW_XINLINE DType Eval(index_t y, index_t x) const {
return OP::Map(lhs_.Eval(y, x), rhs_.Eval(y, x));
}
private:
Plan lhs_;
Plan rhs_;
};
调用模板函数MakePlan从表达式生成Plan,再以BinaryMapExp表达式为例:
template<typename T, typename DType>
inline Plan MakePlan(const RValueExp &e) {
return Plan(e.self());
}
template<typename OP, typename TA, typename TB, typename DType, int etype>
inline Plan, DType>
MakePlan(const BinaryMapExp &e) {
return Plan,
DType>(MakePlan(e.lhs_), MakePlan(e.rhs_));
}
已知ExpEngine中重载的eval中调用了MapExp来实现将Plan的结果传递给目标Exp的作用,MapExp又通过直接或间接(为处理sse优化而加入一个中间类MapExpCPUEngine)调用MapPlan实现上述作用。
Tensor的维数在定义后就固定了,因此在图模型中需要一个更为抽象灵活的数据结构,这就是TBlob:
class TBlob {
public:
/*! \brief pointer to the data */
void *dptr_;
/*! \brief shape of the tensor */
TShape shape_;
/*!
* \brief storing the stride information in x dimension
*/
index_t stride_;
/*! \brief device mask of the corresponding device */
int dev_mask_;
/*! \brief type flag of the tensor blob */
int type_flag_;
......
template<typename Device, int dim, typename DType>
inline Tensor get(Stream *stream = NULL) const {...}
template<typename Device, int dim, typename DType>
inline Tensor get_with_shape(const Shape &shape,
Stream *stream = NULL) const
{
CHECK(Device::kDevMask == dev_mask_)
<< "TBlob.get: device type do not match specified type";
CHECK(DataType::kFlag == type_flag_)
<< "TBlob.get_with_shape: data type do not match specified type."
<< "Expected: " << type_flag_ << " v.s. given " << DataType::kFlag;
CHECK_EQ(this->CheckContiguous(), true) << "TBlob.get_reshape: must be contiguous";
CHECK_EQ(this->shape_.Size(), shape.Size())
<< "TBlob.get_with_shape: new and old shape do not match total elements";
return Tensor(static_cast(dptr_),
shape,
shape[dim - 1],
stream);
}
......
template<typename Device, typename DType>
inline Tensor2, DType> FlatTo2D(Stream *stream = NULL) const {}
......
template<typename Device, typename DType>
inline Tensor3, DType> FlatTo3D(int axis, Stream *stream = NULL)
const {}
......
}
TBlob不涉及任何算数运算,也没有隐式的内存分配与释放,它就像一个指针类,在需要的时候调用get、get_with_shape、FlatTo2D、FlatTo3D等获得固定维数的Tensor来做更多的操作。Tshape与TBlob类似,在需要的时候调用get、FlatTo2D等获得Tensor对应的Shape。