Rockchip平台的mpp硬件编码器:mpp enc 仅接受硬件buf作为输入。当然如果使用malloc开辟的内存,硬件编解码器则无法完成“Zero Copy”,从而大大降低编码性能。本文主要介绍如何组织DRM buf的像素格式,从而避免内存搬运,实现“0拷贝”。
官方提供的mpi_enc_test示例,其开辟drm buf大小是按照宽高16对齐后来计算的。比如:1920x1080的yuv420p图像,16对齐后的宽高为:1920x1088. 那么实际开辟的drm buf大小应为1920x1088x3/2. 之所以实际开辟的内存要16字节对齐,是因为mpp模块最小宏块是按照16x16处理。
既然引入的对齐,那么像素排布就必须必须依照对齐后的分辨率去处理。比如mpi_enc_test中提供的默认组织方式:
/* 针对1080P的分辨率,16对齐后的内存大小:1920x1088。
*
* 像素排布,比如8x4的YUV420P的图像,
* 开辟DMA内存大小为:8x8:
* Y Y Y Y Y Y Y Y
* Y Y Y Y Y Y Y Y
* Y Y Y Y Y Y Y Y
* Y Y Y Y Y Y Y Y
* - - - - - - - - //虚高行空出
* - - - - - - - - //虚高行空出。
* - - - - - - - - //虚高行空出。
* - - - - - - - - //虚高行空出。
* U U U U
* U U U U
* - - - -
* - - - -
* V V V V
* V V V V
* - - - -
* - - - -
*
* 注意,上面面二维buffer图,本质对应一维8x8的buffer数组,
* 数据组织格式如下:
* Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y(紧挨着下一行)
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* U U U U U U U U - - - - - - - - V V V V V V V V - - - - - - - -
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
MPP_RET read_yuv_image(RK_U8 *buf, FILE *fp, RK_U32 width, RK_U32 height,
RK_U32 hor_stride, RK_U32 ver_stride, MppFrameFormat fmt)
{
MPP_RET ret = MPP_OK;
RK_U32 read_size;
RK_U32 row = 0;
RK_U8 *buf_y = buf;
RK_U8 *buf_u = buf_y + hor_stride * ver_stride; // NOTE: diff from gen_yuv_image
RK_U8 *buf_v = buf_u + hor_stride * ver_stride / 4; // NOTE: diff from gen_yuv_image
switch (fmt) {
case MPP_FMT_YUV420SP : {
for (row = 0; row < height; row++) {
read_size = fread(buf_y + row * hor_stride, 1, width, fp);
if (read_size != width) {
mpp_err_f("read ori yuv file luma failed");
ret = MPP_NOK;
goto err;
}
}
for (row = 0; row < height / 2; row++) {
read_size = fread(buf_u + row * hor_stride, 1, width, fp);
if (read_size != width) {
mpp_err_f("read ori yuv file cb failed");
ret = MPP_NOK;
goto err;
}
}
} break;
case MPP_FMT_YUV420P : {
for (row = 0; row < height; row++) {
read_size = fread(buf_y + row * hor_stride, 1, width, fp);
if (read_size != width) {
mpp_err_f("read ori yuv file luma failed");
ret = MPP_NOK;
goto err;
}
}
for (row = 0; row < height / 2; row++) {
read_size = fread(buf_u + row * hor_stride / 2, 1, width / 2, fp);
if (read_size != width / 2) {
mpp_err_f("read ori yuv file cb failed");
ret = MPP_NOK;
goto err;
}
}
for (row = 0; row < height / 2; row++) {
read_size = fread(buf_v + row * hor_stride / 2, 1, width / 2, fp);
if (read_size != width / 2) {
mpp_err_f("read ori yuv file cr failed");
ret = MPP_NOK;
goto err;
}
}
} break;
case MPP_FMT_ARGB8888 : {
for (row = 0; row < height; row++) {
read_size = fread(buf_y + row * hor_stride * 4, 1, width * 4, fp);
}
} break;
case MPP_FMT_YUV422_YUYV :
case MPP_FMT_YUV422_UYVY : {
for (row = 0; row < height; row++) {
read_size = fread(buf_y + row * hor_stride, 1, width * 2, fp);
}
} break;
default : {
mpp_err_f("read image do not support fmt %d\n", fmt);
ret = MPP_ERR_VALUE;
} break;
}
err:
return ret;
}
我们注意到,上述代码组织内存方式需要依照虚高(ver_stride)对齐,但如果前端摄像头采集到数据虽然也是要16对齐的DMA buf,但不会依照MPP要求组织像素内存,这时岂不是就必须进行一次数据搬运?
本文就是讲述MPP的隐含特性:兼容8字节对齐。
经过试验证明,只要内存开辟是按照16对齐。那么像素组织形式仍可采用如下方式(8对齐)。对比上文官方示例中的代码,此处的Y、U、V之间并未有空隔行。而是紧凑的拼接在一起,这符合多数Camera的数据组织方式。
/* 针对1080P的分辨率,16对齐后的内存大小:1920x1088。
*
* 像素格式排布,比如8x4的图像,开辟DMA内存大小为:8x8:
* Y Y Y Y Y Y Y Y
* Y Y Y Y Y Y Y Y
* Y Y Y Y Y Y Y Y
* Y Y Y Y Y Y Y Y
* U U U U
* U U U U
* V V V V
* V V V V
* - - - -
* - - - -
*
* 注意,上面面二维buffer图,本质对应一维8x8的buffer数组,
* 数据组织格式如下:
* Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y(紧挨着下一行)
* U U U U U U U U V V V V V V V V - - - - - - - - - - - - - - - -
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
MPP_RET read_yuv_image(RK_U8 *buf, FILE *fp, RK_U32 width, RK_U32 height,
RK_U32 hor_stride, RK_U32 ver_stride, MppFrameFormat fmt)
{
MPP_RET ret = MPP_OK;
RK_U32 read_size;
RK_U32 row = 0;
RK_U8 *buf_y = NULL;
RK_U8 *buf_u = NULL; // NOTE: diff from gen_yuv_image
RK_U8 *buf_v = NULL; // NOTE: diff from gen_yuv_image
static int debug_save_frame_num = 0;
FILE *debug_frame_fd = NULL;
// printf("+++ FLC-DBG: w:%d, h:%d, hor_stride:%d, ver_stride:%d\n",
// width, height, hor_stride, ver_stride);
buf_y = buf;
buf_u = buf_y + hor_stride * height;
buf_v = buf_u + hor_stride * height / 4;
// printf("+++ FLC-DBG: buffer_y:%p(0), buff_u:%p(%d), buff_v:%p(%d)\n",
// buf_y, buf_u, (int)(buf_u - buf_y), buf_v, (int)(buf_v - buf_u));
switch (fmt) {
case MPP_FMT_YUV420SP : {
for (row = 0; row < height; row++) {
read_size = fread(buf_y + row * hor_stride, 1, width, fp);
if (read_size != width) {
mpp_err_f("read ori yuv file luma failed");
ret = MPP_NOK;
goto err;
}
}
for (row = 0; row < height / 2; row++) {
read_size = fread(buf_u + row * hor_stride, 1, width, fp);
if (read_size != width) {
mpp_err_f("read ori yuv file cb failed");
ret = MPP_NOK;
goto err;
}
}
} break;
case MPP_FMT_YUV420P : {
for (row = 0; row < height; row++) {
read_size = fread(buf_y + row * hor_stride, 1, width, fp);
if (read_size != width) {
mpp_err_f("read ori yuv file luma failed");
ret = MPP_NOK;
goto err;
}
}
for (row = 0; row < height / 2; row++) {
read_size = fread(buf_u + row * hor_stride / 2, 1, width / 2, fp);
if (read_size != width / 2) {
mpp_err_f("read ori yuv file cb failed");
ret = MPP_NOK;
goto err;
}
}
for (row = 0; row < height / 2; row++) {
read_size = fread(buf_v + row * hor_stride / 2, 1, width / 2, fp);
if (read_size != width / 2) {
mpp_err_f("read ori yuv file cr failed");
ret = MPP_NOK;
goto err;
}
}
} break;
case MPP_FMT_ARGB8888 : {
for (row = 0; row < height; row++) {
read_size = fread(buf_y + row * hor_stride * 4, 1, width * 4, fp);
}
} break;
case MPP_FMT_YUV422_YUYV :
case MPP_FMT_YUV422_UYVY : {
for (row = 0; row < height; row++) {
read_size = fread(buf_y + row * hor_stride, 1, width * 2, fp);
}
} break;
default : {
mpp_err_f("read image do not support fmt %d\n", fmt);
ret = MPP_ERR_VALUE;
} break;
}
/* Save 30th frame */
if (debug_save_frame_num++ == 30) {
debug_frame_fd = fopen("/tmp/debug_frame.yuv420p", "wa+");
if (debug_frame_fd) {
fwrite(buf, 1, hor_stride * height * 3 /2, debug_frame_fd);
printf("### FLC-DBG: Saved /tmp/debug_frame.yuv420p\n");
fclose(debug_frame_fd);
} else {
printf("### FLC-DBG: open /tmp/debug_frame.yuv420p failed!\n");
}
}
err:
return ret;
}
注:对应帧和编码器配置需要对应修改。