介绍Expression template,本文是一个学习笔记,主要参考 mashadow项目关于表达式模板的教程
https://github.com/dmlc/mshadow/tree/master/guide/exp-template
class Vec
{
public:
Vec(int len)
{
data.resize(len, 0);
Pval(&data[0]);
}
Vec()
{
LOG(INFO) << "Default construct";
}
~Vec()
{
LOG(INFO) << "Destruct";
}
Vec(const initializer_list<int>& il)
:data(il)
{
}
vector<int> data;
};
Vec test_return()
{
Vec vec;
return vec;
}
TEST(simple1, func)
{
Vec vec = test_return();
}
I0512 11:51:37.124034 22030 test_simple.cc:37] Default construct
I0512 11:51:37.124111 22030 test_simple.cc:41] Destruct
这证明了 c++会对返回值做优化 也就是说没有copy的代价 临时变量优化
对应vec = vec1 + vec2 类似的也是会有临时变量优化 不会有copy冗余代价 一次resize而已 3次destruct
TEST(simple2, func)
{
Vec b{ 3, 2, 1 }, c{ 2, 3, 4 };
Vec a = b + c;
}
I0512 11:57:51.699890 16547 test_simple.cc:33] &data[0] --- [0x8b0340]
I0512 11:57:51.699898 16547 test_simple.cc:58] vec.data --- 3
I0512 11:57:51.699903 16547 test_simple.cc:58] 0 5
I0512 11:57:51.699906 16547 test_simple.cc:58] 1 5
I0512 11:57:51.699911 16547 test_simple.cc:58] 2 5
I0512 11:57:51.699914 16547 test_simple.cc:41] Destruct
I0512 11:57:51.699919 16547 test_simple.cc:41] Destruct
I0512 11:57:51.699923 16547 test_simple.cc:41] Destruct
但是expression template还是有必要 比如比如
vec = vec1 + vec2 + vec3
这个时候临时开辟空间释放是逃不掉的 2次resize 4 + 1次destruct, 有一个次多余的resize 对应一次多余的destruct
TEST(simple3, func)
{
Vec b{ 3, 2, 1 }, c{ 2, 3, 4 }, d{ 123, 45, 30 };
Vec a = b + c + d;
}
I0512 11:59:40.398268 25704 test_simple.cc:33] &data[0] --- [0x8b1330]
I0512 11:59:40.398275 25704 test_simple.cc:58] vec.data --- 3
I0512 11:59:40.398279 25704 test_simple.cc:58] 0 5
I0512 11:59:40.398284 25704 test_simple.cc:58] 1 5
I0512 11:59:40.398288 25704 test_simple.cc:58] 2 5
I0512 11:59:40.398293 25704 test_simple.cc:33] &data[0] --- [0x8b1320]
I0512 11:59:40.398298 25704 test_simple.cc:58] vec.data --- 3
I0512 11:59:40.398304 25704 test_simple.cc:58] 0 128
I0512 11:59:40.398310 25704 test_simple.cc:58] 1 50
I0512 11:59:40.398318 25704 test_simple.cc:58] 2 35
I0512 11:59:40.398321 25704 test_simple.cc:41] Destruct
I0512 11:59:40.398325 25704 test_simple.cc:41] Destruct
I0512 11:59:40.398329 25704 test_simple.cc:41] Destruct
I0512 11:59:40.398334 25704 test_simple.cc:41] Destruct
I0512 11:59:40.398339 25704 test_simple.cc:41] Destruct
当看到+的时候记忆下来不立即处理,当看到=的时候再分配一次空间 统一处理
A = B + C
教程里面给出了示例
exp_lazy.cpp 来自 <https://github.com/dmlc/mshadow/tree/master/guide/exp-template>
struct BinaryAddExp {
const Vec &lhs;
const Vec &rhs;
BinaryAddExp(const Vec &lhs, const Vec &rhs)
: lhs(lhs), rhs(rhs) {}
};
// no constructor and destructor to allocate and de-allocate memory,
// allocation done by user
struct Vec {
int len;
float* dptr;
Vec(void) {}
Vec(float *dptr, int len)
: len(len), dptr(dptr) {}
// here is where evaluation happens
inline Vec &operator=(const BinaryAddExp &src) {
for (int i = 0; i < len; ++i) {
dptr[i] = src.lhs.dptr[i] + src.rhs.dptr[i];
}
return *this;
}
};
// no evaluation happens here
inline BinaryAddExp operator+(const Vec &lhs, const Vec &rhs) {
return BinaryAddExp(lhs, rhs);
}
但是
A = B + C + D
多个操作的时候如何传递这种操作记忆? 只是延迟计算的话读一个一次+操作上面可以看到c++11已经可以处理多余的分配空间了
类似教程中的 稍作改写 其实我们在执行=的时候 对应的是这样的类型
Exp<BinaryAddExp<BinaryAddExp<Vec, Vec>, Vec> >
这个是编译器在在编译期间确定好的
// this is expression, all expressions must inheritate it,
// and put their type in subtype
template<typename SubType>
struct Exp {
// returns const reference of the actual type of this expression
inline const SubType& self(void) const {
return *static_cast<const SubType*>(this);
}
};
// binary add expression
// note how it is inheritates from Exp
// and put its own type into the template argument
template<typename TLhs, typename TRhs>
struct BinaryAddExp : public Exp<BinaryAddExp<TLhs, TRhs> > {
const TLhs &lhs;
const TRhs &rhs;
BinaryAddExp(const TLhs& lhs, const TRhs& rhs)
: lhs(lhs), rhs(rhs) {}
// evaluation function, evaluate this expression at position i
inline float Eval(int i) const {
return lhs.Eval(i) + rhs.Eval(i);
}
inline size_t size()
{
return lhs.size();
}
};
class Vec : public Exp<Vec>
{
public:
Vec(int len)
{
data.resize(len, 0);
Pval(&data[0]);
}
Vec()
{
LOG(INFO) << "Default construct";
}
~Vec()
{
LOG(INFO) << "Destruct";
}
Vec(const initializer_list<int>& il)
:data(il)
{
}
inline size_t size()
{
return data.size();
}
// here is where evaluation happens
template<typename EType>
inline Vec& operator= (const Exp<EType>& src_) {
const EType &src = src_.self();
Pval(src.size());
data.resize(src.size());
for (size_t i = 0; i < src.size(); ++i) {
data[i] = src.Eval(i);
}
return *this;
}
// evaluation function, evaluate this expression at position i
inline int Eval(int i) const {
return data[i];
}
vector<int> data;
};
// template add, works for any expressions
template<typename TLhs, typename TRhs>
inline BinaryAddExp<TLhs, TRhs> operator+(const Exp<TLhs> &lhs, const Exp<TRhs> &rhs)
{
return BinaryAddExp<TLhs, TRhs>(lhs.self(), rhs.self());
}
void run()
{
Vec b{ 3, 2, 1 }, c{ 2, 3, 4 }, d{ 123, 45, 30 };
Vec a;
a = b + c + d;
Pvec(a.data);
}
4次析构 没有多余的构造
I0512 15:15:55.864331 16127 exp_template.cc:70] Default construct
I0512 15:15:55.864430 16127 exp_template.cc:90] src.size() --- [3]
I0512 15:15:55.864437 16127 exp_template.cc:119] a.data --- 3
I0512 15:15:55.864444 16127 exp_template.cc:119] 0 128
I0512 15:15:55.864449 16127 exp_template.cc:119] 1 50
I0512 15:15:55.864452 16127 exp_template.cc:119] 2 35
I0512 15:15:55.864456 16127 exp_template.cc:74] Destruct
I0512 15:15:55.864460 16127 exp_template.cc:74] Destruct
I0512 15:15:55.864464 16127 exp_template.cc:74] Destruct
I0512 15:15:55.864470 16127 exp_template.cc:74] Destruct
如果要支持
Vec a = b + c + d; //拷贝构造函数
需要增加
template<typename EType>
Vec(const Exp<EType>& src_)
{
*this = src_;
}
类似下面的计算
A = B * F<maximum>(C, B);
// binary add expression
// note how it is inheritates from Exp
// and put its own type into the template argument
template<typename OP, typename TLhs, typename TRhs>
struct BinaryMapExp: public Exp<BinaryMapExp<OP, TLhs, TRhs> >{
const TLhs& lhs;
const TRhs& rhs;
BinaryMapExp(const TLhs& lhs, const TRhs& rhs)
:lhs(lhs), rhs(rhs) {}
// evaluation function, evaluate this expression at position i
inline float Eval(int i) const {
return OP::Map(lhs.Eval(i), rhs.Eval(i));
}
};
// no constructor and destructor to allocate and de-allocate memory
// allocation done by user
struct Vec: public Exp<Vec>{
int len;
float* dptr;
Vec(void) {}
Vec(float *dptr, int len)
: len(len), dptr(dptr) {}
// here is where evaluation happens
template<typename EType>
inline Vec& operator=(const Exp<EType>& src_) {
const EType &src = src_.self();
for (int i = 0; i < len; ++i) {
dptr[i] = src.Eval(i);
}
return *this;
}
// evaluation function, evaluate this expression at position i
inline float Eval(int i) const {
return dptr[i];
}
};
// template add, works for any expressions
template<typename OP, typename TLhs, typename TRhs>
inline BinaryMapExp<OP, TLhs, TRhs>
F(const Exp<TLhs>& lhs, const Exp<TRhs>& rhs) {
return BinaryMapExp<OP, TLhs, TRhs>(lhs.self(), rhs.self());
}
template<typename TLhs, typename TRhs>
inline BinaryMapExp<mul, TLhs, TRhs>
operator*(const Exp<TLhs>& lhs, const Exp<TRhs>& rhs) {
return F<mul>(lhs, rhs);
}
// user defined operation
struct maximum{
inline static float Map(float a, float b) {
return a > b ? a : b;
}
};