上文中,我们已经部署好RKNN基本的转换、部署工具的安装操作,下面我们对在视觉算法中常用 Resize 操作进行优化提速,因RV1126/1109采用Arm A7的处理器,我们在使用 opnecv 进行resize操作时,发现会有较大的耗时。针对这一场景,瑞芯微为我们提供了硬件加速方案,通过RGA和DRM可以成倍的提升resize操作的耗时。
图像大小 | resize_opencv | resize_rga |
---|---|---|
640*480 | 22ms | 3ms |
1280*720 | 30ms | 5ms |
PS:测试硬件为RV1126,从上表分析可知,提速比可达到6-7倍左右!!!
rga_func.h
#ifndef __RGA_FUNC_H__
#define __RGA_FUNC_H__
#include
#include "RgaApi.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef int (*FUNC_RGA_INIT)();
typedef void (*FUNC_RGA_DEINIT)();
typedef int (*FUNC_RGA_BLIT)(rga_info_t *, rga_info_t *, rga_info_t *);
typedef struct _rga_context {
void *rga_handle;
FUNC_RGA_INIT init_func;
FUNC_RGA_DEINIT deinit_func;
FUNC_RGA_BLIT blit_func;
} rga_context;
int RGA_init(rga_context *rga_ctx);
void img_resize_fast(rga_context *rga_ctx, int src_fd, int src_w, int src_h,
uint64_t dst_phys, int dst_w, int dst_h);
void img_resize_slow(rga_context *rga_ctx, void *src_virt, int src_w, int src_h,
void *dst_virt, int dst_w, int dst_h);
int RGA_deinit(rga_context *rga_ctx);
#ifdef __cplusplus
}
#endif
#endif /*__RGA_FUNC_H__*/
rga_func.c
#include "rga_func.h"
//根据librga库实际路径进行修改
#define LIBRGAFILE "../lib/librga.so"
int RGA_init(rga_context *rga_ctx) {
rga_ctx->rga_handle = dlopen(LIBRGAFILE, RTLD_LAZY);
if (!rga_ctx->rga_handle) {
printf("dlopen %s failed\n",LIBRGAFILE);
return -1;
}
rga_ctx->init_func = (FUNC_RGA_INIT)dlsym(rga_ctx->rga_handle, "c_RkRgaInit");
rga_ctx->deinit_func =
(FUNC_RGA_DEINIT)dlsym(rga_ctx->rga_handle, "c_RkRgaDeInit");
rga_ctx->blit_func = (FUNC_RGA_BLIT)dlsym(rga_ctx->rga_handle, "c_RkRgaBlit");
rga_ctx->init_func();
return 0;
}
void img_resize_fast(rga_context *rga_ctx, int src_fd, int src_w, int src_h,
uint64_t dst_phys, int dst_w, int dst_h) {
// printf("rga use fd, src(%dx%d) -> dst(%dx%d)\n", src_w, src_h, dst_w,
// dst_h);
if (rga_ctx->rga_handle) {
int ret = 0;
rga_info_t src, dst;
memset(&src, 0, sizeof(rga_info_t));
src.fd = src_fd;
src.mmuFlag = 1;
// src.virAddr = (void *)psrc;
memset(&dst, 0, sizeof(rga_info_t));
dst.fd = -1;
dst.mmuFlag = 0;
#if defined(__arm__)
dst.phyAddr = (void *)((uint32_t)dst_phys);
#else
dst.phyAddr = (void *)dst_phys;
#endif
dst.nn.nn_flag = 0;
rga_set_rect(&src.rect, 0, 0, src_w, src_h, src_w, src_h,
RK_FORMAT_RGB_888);
rga_set_rect(&dst.rect, 0, 0, dst_w, dst_h, dst_w, dst_h,
RK_FORMAT_RGB_888);
ret = rga_ctx->blit_func(&src, &dst, NULL);
if (ret) {
printf("c_RkRgaBlit error : %s\n", strerror(errno));
}
return;
}
return;
}
void img_resize_slow(rga_context *rga_ctx, void *src_virt, int src_w, int src_h,
void *dst_virt, int dst_w, int dst_h) {
// printf("rga use virtual, src(%dx%d) -> dst(%dx%d)\n", src_w, src_h, dst_w,
// dst_h);
if (rga_ctx->rga_handle) {
int ret = 0;
rga_info_t src, dst;
memset(&src, 0, sizeof(rga_info_t));
src.fd = -1;
src.mmuFlag = 1;
src.virAddr = (void *)src_virt;
memset(&dst, 0, sizeof(rga_info_t));
dst.fd = -1;
dst.mmuFlag = 1;
dst.virAddr = dst_virt;
dst.nn.nn_flag = 0;
rga_set_rect(&src.rect, 0, 0, src_w, src_h, src_w, src_h,
RK_FORMAT_RGB_888);
rga_set_rect(&dst.rect, 0, 0, dst_w, dst_h, dst_w, dst_h,
RK_FORMAT_RGB_888);
ret = rga_ctx->blit_func(&src, &dst, NULL);
if (ret) {
printf("c_RkRgaBlit error : %s\n", strerror(errno));
}
return;
}
return;
}
int RGA_deinit(rga_context *rga_ctx) {
if (rga_ctx->rga_handle) {
dlclose(rga_ctx->rga_handle);
rga_ctx->rga_handle = NULL;
}
}
drm_func.h
#ifndef __DRM_FUNC_H__
#define __DRM_FUNC_H__
#include
#include
#include
#include
#include
#include
#include // open function
#include
#include // close function
#include "libdrm/drm_fourcc.h"
#include "xf86drm.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef int (*FUNC_DRM_IOCTL)(int fd, unsigned long request, void *arg);
typedef struct _drm_context {
void *drm_handle;
FUNC_DRM_IOCTL io_func;
} drm_context;
int drm_init(drm_context *drm_ctx);
void *drm_buf_alloc(drm_context *drm_ctx, int drm_fd, int TexWidth,
int TexHeight, int bpp, int *fd, unsigned int *handle,
size_t *actual_size);
int drm_buf_destroy(drm_context *drm_ctx, int drm_fd, int buf_fd, int handle,
void *drm_buf, size_t size);
void drm_deinit(drm_context *drm_ctx, int drm_fd);
#ifdef __cplusplus
}
#endif
#endif /*__DRM_FUNC_H__*/
rga_func.cpp
#include "drm_func.h"
#include
//根据libdrm库实际路径进行修改
#define LIBDRMFILE "../lib/libdrm.so"
int drm_init(drm_context *drm_ctx) {
static const char *card = "/dev/dri/card0";
int flag = O_RDWR;
int drm_fd = -1;
drm_fd = open(card, flag);
if (drm_fd < 0) {
printf("failed to open %s\n", card);
return -1;
}
drm_ctx->drm_handle = dlopen(LIBDRMFILE, RTLD_LAZY);
if (!drm_ctx->drm_handle) {
printf("failed to dlopen %s \n",LIBDRMFILE);
drm_deinit(drm_ctx, drm_fd);
return -1;
}
drm_ctx->io_func = (FUNC_DRM_IOCTL)dlsym(drm_ctx->drm_handle, "drmIoctl");
if (drm_ctx->io_func == NULL) {
dlclose(drm_ctx->drm_handle);
drm_ctx->drm_handle = NULL;
drm_deinit(drm_ctx, drm_fd);
printf("failed to dlsym drmIoctl\n");
return -1;
}
return drm_fd;
}
void drm_deinit(drm_context *drm_ctx, int drm_fd) {
if (drm_ctx->drm_handle) {
dlclose(drm_ctx->drm_handle);
drm_ctx->drm_handle = NULL;
}
if (drm_fd > 0) {
close(drm_fd);
}
}
void *drm_buf_alloc(drm_context *drm_ctx, int drm_fd, int TexWidth,
int TexHeight, int bpp, int *fd, unsigned int *handle,
size_t *actual_size) {
int ret;
if (drm_ctx == NULL) {
printf("drm context is unvalid\n");
return NULL;
}
char *map = NULL;
void *vir_addr = NULL;
struct drm_prime_handle fd_args;
struct drm_mode_map_dumb mmap_arg;
struct drm_mode_destroy_dumb destory_arg;
struct drm_mode_create_dumb alloc_arg;
memset(&alloc_arg, 0, sizeof(alloc_arg));
alloc_arg.bpp = bpp;
alloc_arg.width = TexWidth;
alloc_arg.height = TexHeight;
// alloc_arg.flags = ROCKCHIP_BO_CONTIG;
//获取handle和size
ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &alloc_arg);
if (ret) {
printf("failed to create dumb buffer: %s\n", strerror(errno));
return NULL;
}
if (handle != NULL) {
*handle = alloc_arg.handle;
}
if (actual_size != NULL) {
*actual_size = alloc_arg.size;
}
// printf("create width=%u, height=%u, bpp=%u, size=%lu dumb
// buffer\n",alloc_arg.width,alloc_arg.height,alloc_arg.bpp,alloc_arg.size);
// printf("out handle= %d\n",alloc_arg.handle);
//获取fd
memset(&fd_args, 0, sizeof(fd_args));
fd_args.fd = -1;
fd_args.handle = alloc_arg.handle;
;
fd_args.flags = 0;
ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &fd_args);
if (ret) {
printf("rk-debug handle_to_fd failed ret=%d,err=%s, handle=%x \n", ret,
strerror(errno), fd_args.handle);
return NULL;
}
// printf("out fd = %d, drm fd: %d\n",fd_args.fd,drm_fd);
if (fd != NULL) {
*fd = fd_args.fd;
}
//获取虚拟地址
memset(&mmap_arg, 0, sizeof(mmap_arg));
mmap_arg.handle = alloc_arg.handle;
ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &mmap_arg);
if (ret) {
printf("failed to create map dumb: %s\n", strerror(errno));
vir_addr = NULL;
goto destory_dumb;
}
vir_addr = map = mmap(0, alloc_arg.size, PROT_READ | PROT_WRITE, MAP_SHARED,
drm_fd, mmap_arg.offset);
if (map == MAP_FAILED) {
printf("failed to mmap buffer: %s\n", strerror(errno));
vir_addr = NULL;
goto destory_dumb;
}
// printf("alloc map=%x \n",map);
return vir_addr;
destory_dumb:
memset(&destory_arg, 0, sizeof(destory_arg));
destory_arg.handle = alloc_arg.handle;
ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destory_arg);
if (ret) printf("failed to destory dumb %d\n", ret);
return vir_addr;
}
int drm_buf_destroy(drm_context *drm_ctx, int drm_fd, int buf_fd, int handle,
void *drm_buf, size_t size) {
int ret = -1;
if (drm_buf == NULL) {
printf("drm buffer is NULL\n");
return -1;
}
munmap(drm_buf, size);
struct drm_mode_destroy_dumb destory_arg;
memset(&destory_arg, 0, sizeof(destory_arg));
destory_arg.handle = handle;
ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destory_arg);
if (ret)
printf("failed to destory dumb %d, error=%s\n", ret, strerror(errno));
if (buf_fd > 0) {
close(buf_fd);
}
return ret;
}
image_util.h
#ifndef _IMAGE_UTIL_H
#define _IMAGE_UTIL_H
#include "drm_func.h"
#include "opencv2/opencv.hpp"
#include "rga_func.h"
class ImageUtil {
private:
ImageUtil() { init(); };
virtual ~ImageUtil() { release(); };
ImageUtil(const ImageUtil &) = delete;
ImageUtil(ImageUtil &&) = delete;
ImageUtil &operator=(const ImageUtil &) = delete;
ImageUtil &operator=(ImageUtil &&) = delete;
void *drm_buf = NULL;
int drm_fd = -1;
int buf_fd = -1; // converted from buffer handle
unsigned int handle;
size_t actual_size = 0;
rga_context rga_ctx;
drm_context drm_ctx;
void init(void);
void release(void);
public:
static ImageUtil &getInstance();
void resize(const cv::Mat &src, const cv::Size &size, void *dstPtr);
};
#endif
image_util.cpp
#include "image_util.h"
void ImageUtil::init(void) {
memset(&rga_ctx, 0, sizeof(rga_context));
memset(&drm_ctx, 0, sizeof(drm_context));
drm_fd = drm_init(&drm_ctx);
drm_buf = drm_buf_alloc(&drm_ctx, drm_fd, 1920, 1080, 24, &buf_fd, &handle,
&actual_size);
RGA_init(&rga_ctx);
}
void ImageUtil::release(void) {
drm_buf_destroy(&drm_ctx, drm_fd, buf_fd, handle, drm_buf, actual_size);
drm_deinit(&drm_ctx, drm_fd);
RGA_deinit(&rga_ctx);
}
void ImageUtil::resize(const cv::Mat& src, const cv::Size& size, void* dstPtr) {
if (src.empty()) {
printf("src is empty!\n");
return;
}
int img_width = src.cols;
int img_height = src.rows;
memcpy(drm_buf, src.data, img_width * img_height * 3);
img_resize_slow(&rga_ctx, drm_buf, img_width, img_height, dstPtr, size.width,
size.height);
}
ImageUtil& ImageUtil::getInstance() {
static ImageUtil util;
return util;
}
至此,前置准备工作基本已准备就绪,下一步将对大家感兴趣的各类代码,进行移植与测试,欢迎各位粉丝们的积极留言与探讨。