[RV1109/RV1126系列]-4.RGA、DRM对图像Resize加速

[RV1109/RV1126系列]-4.RGA、DRM对图像Resize加速_第1张图片

RGA、DRM对图像Resize加速

   上文中,我们已经部署好RKNN基本的转换、部署工具的安装操作,下面我们对在视觉算法中常用 Resize 操作进行优化提速,因RV1126/1109采用Arm A7的处理器,我们在使用 opnecv 进行resize操作时,发现会有较大的耗时。针对这一场景,瑞芯微为我们提供了硬件加速方案,通过RGA和DRM可以成倍的提升resize操作的耗时。

1.测试结果

图像大小 resize_opencv resize_rga
640*480 22ms 3ms
1280*720 30ms 5ms

PS:测试硬件为RV1126,从上表分析可知,提速比可达到6-7倍左右!!!

2. RGA部分代码(使用瑞芯微官方源码)

rga_func.h

#ifndef __RGA_FUNC_H__
#define __RGA_FUNC_H__

#include 

#include "RgaApi.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef int (*FUNC_RGA_INIT)();
typedef void (*FUNC_RGA_DEINIT)();
typedef int (*FUNC_RGA_BLIT)(rga_info_t *, rga_info_t *, rga_info_t *);

typedef struct _rga_context {
  void *rga_handle;
  FUNC_RGA_INIT init_func;
  FUNC_RGA_DEINIT deinit_func;
  FUNC_RGA_BLIT blit_func;
} rga_context;

int RGA_init(rga_context *rga_ctx);

void img_resize_fast(rga_context *rga_ctx, int src_fd, int src_w, int src_h,
                     uint64_t dst_phys, int dst_w, int dst_h);

void img_resize_slow(rga_context *rga_ctx, void *src_virt, int src_w, int src_h,
                     void *dst_virt, int dst_w, int dst_h);

int RGA_deinit(rga_context *rga_ctx);

#ifdef __cplusplus
}
#endif
#endif /*__RGA_FUNC_H__*/

rga_func.c

#include "rga_func.h"

//根据librga库实际路径进行修改
#define LIBRGAFILE "../lib/librga.so"
int RGA_init(rga_context *rga_ctx) {
  rga_ctx->rga_handle = dlopen(LIBRGAFILE, RTLD_LAZY);
  if (!rga_ctx->rga_handle) {
    printf("dlopen %s failed\n",LIBRGAFILE);
    return -1;
  }
  rga_ctx->init_func = (FUNC_RGA_INIT)dlsym(rga_ctx->rga_handle, "c_RkRgaInit");
  rga_ctx->deinit_func =
      (FUNC_RGA_DEINIT)dlsym(rga_ctx->rga_handle, "c_RkRgaDeInit");
  rga_ctx->blit_func = (FUNC_RGA_BLIT)dlsym(rga_ctx->rga_handle, "c_RkRgaBlit");
  rga_ctx->init_func();
  return 0;
}

void img_resize_fast(rga_context *rga_ctx, int src_fd, int src_w, int src_h,
                     uint64_t dst_phys, int dst_w, int dst_h) {
  // printf("rga use fd, src(%dx%d) -> dst(%dx%d)\n", src_w, src_h, dst_w,
  // dst_h);

  if (rga_ctx->rga_handle) {
    int ret = 0;
    rga_info_t src, dst;

    memset(&src, 0, sizeof(rga_info_t));
    src.fd = src_fd;
    src.mmuFlag = 1;
    // src.virAddr = (void *)psrc;

    memset(&dst, 0, sizeof(rga_info_t));
    dst.fd = -1;
    dst.mmuFlag = 0;

#if defined(__arm__)
    dst.phyAddr = (void *)((uint32_t)dst_phys);
#else
    dst.phyAddr = (void *)dst_phys;
#endif

    dst.nn.nn_flag = 0;

    rga_set_rect(&src.rect, 0, 0, src_w, src_h, src_w, src_h,
                 RK_FORMAT_RGB_888);
    rga_set_rect(&dst.rect, 0, 0, dst_w, dst_h, dst_w, dst_h,
                 RK_FORMAT_RGB_888);

    ret = rga_ctx->blit_func(&src, &dst, NULL);
    if (ret) {
      printf("c_RkRgaBlit error : %s\n", strerror(errno));
    }

    return;
  }
  return;
}

void img_resize_slow(rga_context *rga_ctx, void *src_virt, int src_w, int src_h,
                     void *dst_virt, int dst_w, int dst_h) {
  // printf("rga use virtual, src(%dx%d) -> dst(%dx%d)\n", src_w, src_h, dst_w,
  // dst_h);

  if (rga_ctx->rga_handle) {
    int ret = 0;
    rga_info_t src, dst;

    memset(&src, 0, sizeof(rga_info_t));
    src.fd = -1;
    src.mmuFlag = 1;
    src.virAddr = (void *)src_virt;

    memset(&dst, 0, sizeof(rga_info_t));
    dst.fd = -1;
    dst.mmuFlag = 1;
    dst.virAddr = dst_virt;

    dst.nn.nn_flag = 0;

    rga_set_rect(&src.rect, 0, 0, src_w, src_h, src_w, src_h,
                 RK_FORMAT_RGB_888);
    rga_set_rect(&dst.rect, 0, 0, dst_w, dst_h, dst_w, dst_h,
                 RK_FORMAT_RGB_888);

    ret = rga_ctx->blit_func(&src, &dst, NULL);
    if (ret) {
      printf("c_RkRgaBlit error : %s\n", strerror(errno));
    }
    return;
  }
  return;
}

int RGA_deinit(rga_context *rga_ctx) {
  if (rga_ctx->rga_handle) {
    dlclose(rga_ctx->rga_handle);
    rga_ctx->rga_handle = NULL;
  }
}

3. DRM部分代码(使用瑞芯微官方源码)

drm_func.h

#ifndef __DRM_FUNC_H__
#define __DRM_FUNC_H__
#include 
#include 
#include 
#include 
#include 
#include 
#include   // open function
#include 
#include   // close function

#include "libdrm/drm_fourcc.h"
#include "xf86drm.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef int (*FUNC_DRM_IOCTL)(int fd, unsigned long request, void *arg);

typedef struct _drm_context {
  void *drm_handle;
  FUNC_DRM_IOCTL io_func;
} drm_context;

int drm_init(drm_context *drm_ctx);

void *drm_buf_alloc(drm_context *drm_ctx, int drm_fd, int TexWidth,
                    int TexHeight, int bpp, int *fd, unsigned int *handle,
                    size_t *actual_size);

int drm_buf_destroy(drm_context *drm_ctx, int drm_fd, int buf_fd, int handle,
                    void *drm_buf, size_t size);

void drm_deinit(drm_context *drm_ctx, int drm_fd);

#ifdef __cplusplus
}
#endif
#endif /*__DRM_FUNC_H__*/

rga_func.cpp

#include "drm_func.h"

#include 

//根据libdrm库实际路径进行修改
#define LIBDRMFILE "../lib/libdrm.so"

int drm_init(drm_context *drm_ctx) {
  static const char *card = "/dev/dri/card0";
  int flag = O_RDWR;
  int drm_fd = -1;

  drm_fd = open(card, flag);
  if (drm_fd < 0) {
    printf("failed to open %s\n", card);
    return -1;
  }

  drm_ctx->drm_handle = dlopen(LIBDRMFILE, RTLD_LAZY);
  if (!drm_ctx->drm_handle) {
    printf("failed to dlopen %s \n",LIBDRMFILE);
    drm_deinit(drm_ctx, drm_fd);
    return -1;
  }

  drm_ctx->io_func = (FUNC_DRM_IOCTL)dlsym(drm_ctx->drm_handle, "drmIoctl");
  if (drm_ctx->io_func == NULL) {
    dlclose(drm_ctx->drm_handle);
    drm_ctx->drm_handle = NULL;
    drm_deinit(drm_ctx, drm_fd);
    printf("failed to dlsym drmIoctl\n");
    return -1;
  }
  return drm_fd;
}

void drm_deinit(drm_context *drm_ctx, int drm_fd) {
  if (drm_ctx->drm_handle) {
    dlclose(drm_ctx->drm_handle);
    drm_ctx->drm_handle = NULL;
  }
  if (drm_fd > 0) {
    close(drm_fd);
  }
}

void *drm_buf_alloc(drm_context *drm_ctx, int drm_fd, int TexWidth,
                    int TexHeight, int bpp, int *fd, unsigned int *handle,
                    size_t *actual_size) {
  int ret;
  if (drm_ctx == NULL) {
    printf("drm context is unvalid\n");
    return NULL;
  }
  char *map = NULL;

  void *vir_addr = NULL;
  struct drm_prime_handle fd_args;
  struct drm_mode_map_dumb mmap_arg;
  struct drm_mode_destroy_dumb destory_arg;

  struct drm_mode_create_dumb alloc_arg;

  memset(&alloc_arg, 0, sizeof(alloc_arg));
  alloc_arg.bpp = bpp;
  alloc_arg.width = TexWidth;
  alloc_arg.height = TexHeight;
  // alloc_arg.flags = ROCKCHIP_BO_CONTIG;

  //获取handle和size
  ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_CREATE_DUMB, &alloc_arg);
  if (ret) {
    printf("failed to create dumb buffer: %s\n", strerror(errno));
    return NULL;
  }
  if (handle != NULL) {
    *handle = alloc_arg.handle;
  }
  if (actual_size != NULL) {
    *actual_size = alloc_arg.size;
  }
  // printf("create width=%u, height=%u, bpp=%u, size=%lu dumb
  // buffer\n",alloc_arg.width,alloc_arg.height,alloc_arg.bpp,alloc_arg.size);
  // printf("out handle= %d\n",alloc_arg.handle);

  //获取fd
  memset(&fd_args, 0, sizeof(fd_args));
  fd_args.fd = -1;
  fd_args.handle = alloc_arg.handle;
  ;
  fd_args.flags = 0;
  ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &fd_args);
  if (ret) {
    printf("rk-debug handle_to_fd failed ret=%d,err=%s, handle=%x \n", ret,
           strerror(errno), fd_args.handle);
    return NULL;
  }
  // printf("out fd = %d, drm fd: %d\n",fd_args.fd,drm_fd);
  if (fd != NULL) {
    *fd = fd_args.fd;
  }

  //获取虚拟地址
  memset(&mmap_arg, 0, sizeof(mmap_arg));
  mmap_arg.handle = alloc_arg.handle;

  ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_MAP_DUMB, &mmap_arg);
  if (ret) {
    printf("failed to create map dumb: %s\n", strerror(errno));
    vir_addr = NULL;
    goto destory_dumb;
  }
  vir_addr = map = mmap(0, alloc_arg.size, PROT_READ | PROT_WRITE, MAP_SHARED,
                        drm_fd, mmap_arg.offset);
  if (map == MAP_FAILED) {
    printf("failed to mmap buffer: %s\n", strerror(errno));
    vir_addr = NULL;
    goto destory_dumb;
  }
  // printf("alloc map=%x \n",map);
  return vir_addr;
destory_dumb:
  memset(&destory_arg, 0, sizeof(destory_arg));
  destory_arg.handle = alloc_arg.handle;
  ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destory_arg);
  if (ret) printf("failed to destory dumb %d\n", ret);
  return vir_addr;
}

int drm_buf_destroy(drm_context *drm_ctx, int drm_fd, int buf_fd, int handle,
                    void *drm_buf, size_t size) {
  int ret = -1;
  if (drm_buf == NULL) {
    printf("drm buffer is NULL\n");
    return -1;
  }

  munmap(drm_buf, size);

  struct drm_mode_destroy_dumb destory_arg;
  memset(&destory_arg, 0, sizeof(destory_arg));
  destory_arg.handle = handle;
  ret = drm_ctx->io_func(drm_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destory_arg);
  if (ret)
    printf("failed to destory dumb %d, error=%s\n", ret, strerror(errno));
  if (buf_fd > 0) {
    close(buf_fd);
  }

  return ret;
}

4. 封装接口代码

image_util.h

#ifndef _IMAGE_UTIL_H
#define _IMAGE_UTIL_H

#include "drm_func.h"
#include "opencv2/opencv.hpp"
#include "rga_func.h"

class ImageUtil {
 private:
  ImageUtil() { init(); };
  virtual ~ImageUtil() { release(); };
  ImageUtil(const ImageUtil &) = delete;
  ImageUtil(ImageUtil &&) = delete;
  ImageUtil &operator=(const ImageUtil &) = delete;
  ImageUtil &operator=(ImageUtil &&) = delete;
  void *drm_buf = NULL;
  int drm_fd = -1;
  int buf_fd = -1;  // converted from buffer handle
  unsigned int handle;
  size_t actual_size = 0;
  rga_context rga_ctx;
  drm_context drm_ctx;
  void init(void);
  void release(void);

 public:
  static ImageUtil &getInstance();

  void resize(const cv::Mat &src, const cv::Size &size, void *dstPtr);
};

#endif

image_util.cpp

#include "image_util.h"

void ImageUtil::init(void) {
  memset(&rga_ctx, 0, sizeof(rga_context));
  memset(&drm_ctx, 0, sizeof(drm_context));
  drm_fd = drm_init(&drm_ctx);
  drm_buf = drm_buf_alloc(&drm_ctx, drm_fd, 1920, 1080, 24, &buf_fd, &handle,
                          &actual_size);
  RGA_init(&rga_ctx);
}
void ImageUtil::release(void) {
  drm_buf_destroy(&drm_ctx, drm_fd, buf_fd, handle, drm_buf, actual_size);
  drm_deinit(&drm_ctx, drm_fd);
  RGA_deinit(&rga_ctx);
}

void ImageUtil::resize(const cv::Mat& src, const cv::Size& size, void* dstPtr) {
  if (src.empty()) {
    printf("src is empty!\n");
    return;
  }
  int img_width = src.cols;
  int img_height = src.rows;
  memcpy(drm_buf, src.data, img_width * img_height * 3);
  img_resize_slow(&rga_ctx, drm_buf, img_width, img_height, dstPtr, size.width,
                  size.height);
}

ImageUtil& ImageUtil::getInstance() {
  static ImageUtil util;
  return util;
}

至此,前置准备工作基本已准备就绪,下一步将对大家感兴趣的各类代码,进行移植与测试,欢迎各位粉丝们的积极留言与探讨。


你可能感兴趣的:(arm,opencv,深度学习,rknn,rknnpu)