参考OpenCV中Mat和GpuMat的设计,对当前Image类设计了GPU版本,即GPUImage。
设计图像头。
struct GPUImageHeader
{
int32_t nWidth = 0; //宽度
int32_t nHeight = 0; //高度
int16_t nChannel = 0; //通道数
int32_t nRefCount = 0; //引用计数
void* pImage = nullptr;
};
其中指针指向的是GPU上的地址。引用计数用来实现浅拷贝以及显存的自动释放管理。不像OpenCV和Nppi,分通道和深度的做法。GPUImage的nChannel可以理解为通道*深度。
GPUImage用来管理CudaMalloc申请的显存,在GBox设备上,使用CudaMallocManaged来申请的GPU和CPU共用的地址。绑定功能暂不实现。
class BASE_EXPORT GPUImage
{
public:
GPUImage();
GPUImage(int nW, int nH, int nD); //create
~GPUImage();
GPUImage(const GPUImage& m);
GPUImage& operator=(const GPUImage& m);
void Create(int nW, int nH, int nC); //使用默认的内存布局
void Release();
//增加一个和常规图像的互
GPUImage(Image& m);
bool ToImage(Image& m);
bool FromImage(Image& m);
//Get
void* Ptr();
int Width();
int Height();
int Depth();
Rect Rect();
private:
GPUImageHeader* m_pHeader = nullptr;
};
图像创建操作:
void GPUImage::Create(int nW, int nH, int nC)
{
if (DebugCmd)
std::cout << "void Matrix::Create(int nW, int nH, int nC)" << std::endl;
//释放当前
Release();
m_pHeader = new GPUImageHeader();
m_pHeader->nWidth = nW;
m_pHeader->nHeight = nH;
m_pHeader->nChannel = nC;
size_t memSize = m_pHeader->nWidth * m_pHeader->nHeight * m_pHeader->nChannel + 31;
auto ret = cudaMalloc((void**)&(m_pHeader->pImage), memSize);
//如果ret不对,要抛异常
if (ret != cudaSuccess)
throw std::exception("GPUImage::FromImage(Image& m) error");
META_ATOMIC_ADD(&m_pHeader->nRefCount, 1);
if (DebugCmd)
std::cout << "m_Image.nRefCount:" << m_pHeader->nRefCount << std::endl;
}
采用cudamalloc申请图像空间。
图像释放操作:
void GPUImage::Release()
{
if (DebugCmd)
std::cout << "Release()" << std::endl;
if (m_pHeader)
{
assert(m_pHeader->nRefCount > 0);
if (META_ATOMIC_ADD(&m_pHeader->nRefCount, -1) == 1)
{
if (m_pHeader->pImage)
{
auto ret = cudaFree(m_pHeader->pImage);
//如果ret不对,要抛异常
if (ret != cudaSuccess)
throw std::exception("GPUImage::FromImage(Image& m) error");
}
m_pHeader->pImage = nullptr;
delete m_pHeader;
m_pHeader = nullptr;
if (DebugCmd)
std::cout << "Release::m_pHeader:delete" << std::endl;
}
else
{
if (DebugCmd)
std::cout << "Release::m_Image.nRefCount:" << m_pHeader->nRefCount << std::endl;
}
}
else
{
if (DebugCmd)
std::cout << "m_pHeader == nullptr" << std::endl;
}
}
释放时要判断引用计数,只有降为0,才进行显存的释放。
图像拷贝构造:
GPUImage::GPUImage(const GPUImage& m)
{
if (DebugCmd)
std::cout << "Matrix(const Matrix& m)" << std::endl;
if (m.m_pHeader)
{
META_ATOMIC_ADD(&m.m_pHeader->nRefCount, 1);
m_pHeader = m.m_pHeader;
if (DebugCmd)
std::cout << "m_Image.nRefCount:" << m_pHeader->nRefCount << std::endl;
}
else
{
m_pHeader = nullptr;
if (DebugCmd)
std::cout << "m_Image.nRefCount:0,m_pHeader == nullptr" << std::endl;
}
}
拷贝时,并不申请显存,而是将引用计算加1。
赋值构造:
GPUImage& GPUImage::operator=(const GPUImage& m)
{
if (DebugCmd)
std::cout << "Matrix& operator=(const Matrix& m)" << std::endl;
if (this->m_pHeader != m.m_pHeader)
{
if (m.m_pHeader)
{
META_ATOMIC_ADD(&m.m_pHeader->nRefCount, 1);
Release(); //释放当前
m_pHeader = m.m_pHeader;
if (DebugCmd)
std::cout << "m_Image.nRefCount:" << m_pHeader->nRefCount << std::endl;
}
else
{
m_pHeader = nullptr;
if (DebugCmd)
std::cout << "m_Image.nRefCount:0,m_pHeader == nullptr" << std::endl;
}
}
else
{
if (DebugCmd)
std::cout << "this->m_pHeader == m.m_pHeader" << std::endl;
}
return *this;
}
赋值构造时,要释放被赋值图像,,然后把赋值图像引用计数加1。
将上述DebugCmd打开,测试如下代码:
void test1(MetaCore::GPUImage imA)
{
std::cout << "期望引用计数会减1" << std::endl;
return;
}
void test2(MetaCore::GPUImage& imB)
{
std::cout << "期望不发生任何事情" << std::endl;
return;
}
MetaCore::GPUImage test3()