图像旋转特点:每个像素相互独立,旋转后像素位置为浮点数,需要进行差值。
有以下几种方法:
*.cpp文件
int RotateImage(const unsigned char* img, const unsigned char* imgMask, const cv::Size imgSize, const float angle, unsigned char*& dst, unsigned char*& dstMask, cv::Size& dstSize)
{
if (img == nullptr || imgSize.area() <= 0)
return -1;
//Init
unsigned char *gpu_img;//旋转前
unsigned char *gpu_imgMask;
cv::Size* gpu_imgSize;
float* gpu_angle;
unsigned char *gpu_dst;//旋转后
unsigned char *gpu_dstMask;
cv::Size* gpu_dstSize;
//Malloc
cudaMalloc((void**)&gpu_img, sizeof(unsigned char)* imgSize.width*imgSize.height);
if (imgMask != nullptr)
cudaMalloc((void**)&gpu_imgMask, sizeof(unsigned char)* (imgSize.width*imgSize.height));//旋转前模板蒙板大小,按最大分配空间
cudaMalloc((void**)&gpu_imgSize, sizeof(cv::Size));
cudaMalloc((void**)&gpu_angle, sizeof(float));
cudaMalloc((void**)&gpu_dst, sizeof(unsigned char)*(imgSize.width*imgSize.width + imgSize.height*imgSize.height));//旋转后模板大小,按最大分配空间
cudaMalloc((void**)&gpu_dstMask, sizeof(unsigned char)*(imgSize.width*imgSize.width + imgSize.height*imgSize.height));//旋转后模板大小,按最大分配空间
cudaMalloc((void**)&gpu_dstSize, sizeof(cv::Size));
//Memcpy
cudaMemcpy(gpu_img, img, sizeof(unsigned char)* imgSize.width*imgSize.height, cudaMemcpyHostToDevice);
if (imgMask != nullptr)
cudaMemcpy(gpu_imgMask, imgMask, sizeof(unsigned char)* imgSize.width*imgSize.height, cudaMemcpyHostToDevice);
cudaMemcpy(gpu_imgSize, &imgSize, sizeof(cv::Size), cudaMemcpyHostToDevice);
cudaMemcpy(gpu_angle, &angle, sizeof(float), cudaMemcpyHostToDevice);
dstSize.width = (int)(abs(imgSize.width*cos(angle)) + abs(imgSize.height*sin(angle)));
dstSize.height = (int)(abs(imgSize.width*sin(angle)) + abs(imgSize.height*cos(angle)));
cudaMemcpy(gpu_dstSize, &dstSize, sizeof(cv::Size), cudaMemcpyHostToDevice);
//Compute
if (imgMask != nullptr)
RotateImage_CUDA(gpu_img, gpu_imgMask, gpu_imgSize, gpu_angle, gpu_dst, gpu_dstMask, gpu_dstSize, dstSize.area());
else
RotateImage_CUDA(gpu_img, nullptr, gpu_imgSize, gpu_angle, gpu_dst, gpu_dstMask, gpu_dstSize, dstSize.area());
cudaDeviceSynchronize();//等待GPU执行完成
dst = new unsigned char[dstSize.width*dstSize.height];//拷贝结果
memset(dst, 0, sizeof(unsigned char)*dstSize.width*dstSize.height);
cudaMemcpy(dst, gpu_dst, sizeof(unsigned char) * dstSize.width*dstSize.height, cudaMemcpyDeviceToHost);
dstMask = new unsigned char[dstSize.width*dstSize.height];//拷贝结果
memset(dstMask, 0, sizeof(unsigned char)*dstSize.width*dstSize.height);
cudaMemcpy(dstMask, gpu_dstMask, sizeof(unsigned char) * dstSize.width*dstSize.height, cudaMemcpyDeviceToHost);
#ifdef _DEBUG
cv::Mat mat_dst = cv::Mat(dstSize, CV_8UC1, dst, 0);
cv::Mat mat_dstMask = cv::Mat(dstSize, CV_8UC1, dstMask, 0);
#endif
//cudaFree
cudaFree(gpu_img);
if (imgMask != nullptr)
cudaFree(gpu_imgMask);
cudaFree(gpu_imgSize);
cudaFree(gpu_angle);
cudaFree(gpu_dst);
cudaFree(gpu_dstMask);
cudaFree(gpu_dstSize);
return 0;
}
*.cu文件
void RotateImage_CUDA(unsigned char* img, unsigned char* maskOld, cv::Size* imgsize, float* angle,
unsigned char* result, unsigned char* maskNew, cv::Size* resultsize, int threads)
{
int block_size = 32;
int grid_size = (threads + block_size - 1) / block_size;
RotateImage_CUDA_SubFunction << > > (img, maskOld, imgsize, angle,
result, maskNew, resultsize);
}
template
__global__ void RotateImage_CUDA_SubFunction(T* img, T* maskOld, cv::Size* imgsize, float* angle,
T* result, T* maskNew, cv::Size* resultsize)
{
//根据图像中心+角度进行旋转
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;//线程数,对应每个像素
unsigned int x_res = tid % (*resultsize).width;
unsigned int y_res = tid / (*resultsize).width;
float center_x_res = 1.0*(*resultsize).width / 2;
float center_y_res = 1.0*(*resultsize).height / 2;
float center_x_org = 1.0*(*imgsize).width / 2;
float center_y_org = 1.0*(*imgsize).height / 2;
if (tid < (*resultsize).width*(*resultsize).height)
{
int x_org = (x_res - center_x_res) * cos(-*angle) - (y_res - center_y_res) * sin(-*angle) + center_x_org;
int y_org = (x_res - center_x_res) * sin(-*angle) + (y_res - center_y_res) * cos(-*angle) + center_y_org;
if (x_org >= 0 && x_org < (*imgsize).width&&y_org >= 0 && y_org < (*imgsize).height)
{
result[tid] = img[x_org + y_org*(*imgsize).width];
if (maskOld == nullptr)
maskNew[tid] = 255;
else
maskNew[tid] = maskOld[x_org + y_org*(*imgsize).width];
}
else
{
result[tid] = 0;
maskNew[tid] = 0;
}
}
//else
// printf("%d\n", tid);
}