HEVC解码器的CTU解码(CTU Decoder)部分在整个HEVC解码器中的位置如下图红框所示,在hls_coding_unit()之中。CTU解码(CTU Decoder)部分的函数调用关系如下图右边方框所示。(右键新窗口打开查看大图)
//解码入口函数 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) { HEVCContext *s = avctxt->priv_data; //CTB尺寸 int ctb_size = 1 << s->sps->log2_ctb_size; int more_data = 1; int x_ctb = 0; int y_ctb = 0; int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]; if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n"); return AVERROR_INVALIDDATA; } if (s->sh.dependent_slice_segment_flag) { int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1]; if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) { av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n"); return AVERROR_INVALIDDATA; } } while (more_data && ctb_addr_ts < s->sps->ctb_size) { int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts]; //CTB的位置x和y x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size; y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size; //初始化周围的参数 hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts); //初始化CABAC ff_hevc_cabac_init(s, ctb_addr_ts); //样点自适应补偿参数 hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size); s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset; s->deblock[ctb_addr_rs].tc_offset = s->sh.tc_offset; s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag; //解析四叉树结构,并且解码 more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0); if (more_data < 0) { s->tab_slice_address[ctb_addr_rs] = -1; return more_data; } ctb_addr_ts++; //保存解码信息以供下次使用 ff_hevc_save_states(s, ctb_addr_ts); //去块效应滤波 ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size); } if (x_ctb + ctb_size >= s->sps->width && y_ctb + ctb_size >= s->sps->height) ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size); return ctb_addr_ts; }
从源代码可以看出,hls_decode_entry()调用了5个函数进行解码工作:
(1)调用hls_decode_neighbour初始化CTU周围的参数信息。
(2)调用ff_hevc_cabac_init()进行CABAC初始化。
(3)调用hls_sao_param初始化样点自适应补偿参数。
(4)调用hls_coding_quadtree()解码CTU。其中包含了PU和TU的解码。本文分析第四步的PU和TU解码过程
(5)调用ff_hevc_hls_filters()进行滤波。其中包含了去块效应滤波和SAO滤波。
/* * 解析四叉树结构,并且解码 * 注意该函数是递归调用 * * s:HEVCContext上下文结构体 * x_ctb:CB位置的x坐标 * y_ctb:CB位置的y坐标 * log2_cb_size:CB大小取log2之后的值 * cb_depth:深度 * */ static int hls_coding_quadtree(HEVCContext *s, int x0, int y0, int log2_cb_size, int cb_depth) { HEVCLocalContext *lc = s->HEVClc; //CB的大小,split flag=0 //log2_cb_size为CB大小取log之后的结果 const int cb_size = 1 << log2_cb_size; int ret; int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1; int split_cu; //确定CU是否还会划分 lc->ct_depth = cb_depth; if (x0 + cb_size <= s->sps->width && y0 + cb_size <= s->sps->height && log2_cb_size > s->sps->log2_min_cb_size) { split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0); } else { split_cu = (log2_cb_size > s->sps->log2_min_cb_size); } if (s->pps->cu_qp_delta_enabled_flag && log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) { lc->tu.is_cu_qp_delta_coded = 0; lc->tu.cu_qp_delta = 0; } if (s->sh.cu_chroma_qp_offset_enabled_flag && log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) { lc->tu.is_cu_chroma_qp_offset_coded = 0; } if (split_cu) { //如果CU还可以继续划分,则继续解析划分后的CU //注意:这里是递归调用 //CB的大小,split flag=1 const int cb_size_split = cb_size >> 1; const int x1 = x0 + cb_size_split; const int y1 = y0 + cb_size_split; int more_data = 0; //注意: //CU大小减半,log2_cb_size-1 //深度d加1,cb_depth+1 more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1); if (more_data < 0) return more_data; if (more_data && x1 < s->sps->width) { more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1); if (more_data < 0) return more_data; } if (more_data && y1 < s->sps->height) { more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1); if (more_data < 0) return more_data; } if (more_data && x1 < s->sps->width && y1 < s->sps->height) { more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1); if (more_data < 0) return more_data; } if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 && ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) lc->qPy_pred = lc->qp_y; if (more_data) return ((x1 + cb_size_split) < s->sps->width || (y1 + cb_size_split) < s->sps->height); else return 0; } else { //注意处理的是不可划分的CU单元 //处理CU单元-真正的解码 ret = hls_coding_unit(s, x0, y0, log2_cb_size); if (ret < 0) return ret; if ((!((x0 + cb_size) % (1 << (s->sps->log2_ctb_size))) || (x0 + cb_size >= s->sps->width)) && (!((y0 + cb_size) % (1 << (s->sps->log2_ctb_size))) || (y0 + cb_size >= s->sps->height))) { int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s); return !end_of_slice_flag; } else { return 1; } } return 0; }从源代码可以看出,hls_coding_quadtree()首先调用ff_hevc_split_coding_unit_flag_decode()判断当前CU是否还需要划分。如果需要划分的话,就会递归调用4次hls_coding_quadtree()分别对4个子块继续进行四叉树解析;如果不需要划分,就会调用hls_coding_unit()对CU进行解码。
//处理CU单元-真正的解码 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size) { //CB大小 int cb_size = 1 << log2_cb_size; HEVCLocalContext *lc = s->HEVClc; int log2_min_cb_size = s->sps->log2_min_cb_size; int length = cb_size >> log2_min_cb_size; int min_cb_width = s->sps->min_cb_width; //以最小的CB为单位(例如4x4)的时候,当前CB的位置——x坐标和y坐标 int x_cb = x0 >> log2_min_cb_size; int y_cb = y0 >> log2_min_cb_size; int idx = log2_cb_size - 2; int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1; int x, y, ret; //设置CU的属性值 lc->cu.x = x0; lc->cu.y = y0; lc->cu.pred_mode = MODE_INTRA; lc->cu.part_mode = PART_2Nx2N; lc->cu.intra_split_flag = 0; SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0; for (x = 0; x < 4; x++) lc->pu.intra_pred_mode[x] = 1; if (s->pps->transquant_bypass_enable_flag) { lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s); if (lc->cu.cu_transquant_bypass_flag) set_deblocking_bypass(s, x0, y0, log2_cb_size); } else lc->cu.cu_transquant_bypass_flag = 0; if (s->sh.slice_type != I_SLICE) { //Skip类型 uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb); //设置到skip_flag缓存中 x = y_cb * min_cb_width + x_cb; for (y = 0; y < length; y++) { memset(&s->skip_flag[x], skip_flag, length); x += min_cb_width; } lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER; } else { x = y_cb * min_cb_width + x_cb; for (y = 0; y < length; y++) { memset(&s->skip_flag[x], 0, length); x += min_cb_width; } } if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) { hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx); intra_prediction_unit_default_value(s, x0, y0, log2_cb_size); if (!s->sh.disable_deblocking_filter_flag) ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size); } else { int pcm_flag = 0; //读取预测模式(非 I Slice) if (s->sh.slice_type != I_SLICE) lc->cu.pred_mode = ff_hevc_pred_mode_decode(s); //不是帧内预测模式的时候 //或者已经是最小CB的时候 if (lc->cu.pred_mode != MODE_INTRA || log2_cb_size == s->sps->log2_min_cb_size) { //读取CU分割模式 lc->cu.part_mode = ff_hevc_part_mode_decode(s, log2_cb_size); lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN && lc->cu.pred_mode == MODE_INTRA; } if (lc->cu.pred_mode == MODE_INTRA) { //帧内预测模式 //PCM方式编码,不常见 if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag && log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size && log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) { pcm_flag = ff_hevc_pcm_flag_decode(s); } if (pcm_flag) { intra_prediction_unit_default_value(s, x0, y0, log2_cb_size); ret = hls_pcm_sample(s, x0, y0, log2_cb_size); if (s->sps->pcm.loop_filter_disable_flag) set_deblocking_bypass(s, x0, y0, log2_cb_size); if (ret < 0) return ret; } else { //获取帧内预测模式 intra_prediction_unit(s, x0, y0, log2_cb_size); } } else { //帧间预测模式 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size); //帧间模式一共有8种划分模式 switch (lc->cu.part_mode) { case PART_2Nx2N: //处理PU单元-运动补偿 /* * hls_prediction_unit()参数: * x0 : PU左上角x坐标 * y0 : PU左上角y坐标 * nPbW : PU宽度 * nPbH : PU高度 * log2_cb_size : CB大小取log2()的值 * partIdx : PU的索引号-分成4个块的时候取0-3,分成两个块的时候取0和1 */ hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx); break; case PART_2NxN: hls_prediction_unit(s, x0, y0, cb_size, cb_size / 2, log2_cb_size, 0, idx); hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx); break; case PART_Nx2N: hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1); hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1); break; case PART_2NxnU: hls_prediction_unit(s, x0, y0, cb_size, cb_size / 4, log2_cb_size, 0, idx); hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx); break; case PART_2NxnD: hls_prediction_unit(s, x0, y0, cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx); hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size / 4, log2_cb_size, 1, idx); break; case PART_nLx2N: hls_prediction_unit(s, x0, y0, cb_size / 4, cb_size, log2_cb_size, 0, idx - 2); hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2); break; case PART_nRx2N: hls_prediction_unit(s, x0, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2); hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size / 4, cb_size, log2_cb_size, 1, idx - 2); break; case PART_NxN: hls_prediction_unit(s, x0, y0, cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1); hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1); hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1); hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1); break; } } if (!pcm_flag) { int rqt_root_cbf = 1; if (lc->cu.pred_mode != MODE_INTRA && !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) { rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s); } if (rqt_root_cbf) { const static int cbf[2] = { 0 }; lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ? s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag : s->sps->max_transform_hierarchy_depth_inter; //处理TU四叉树 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0, log2_cb_size, log2_cb_size, 0, 0, cbf, cbf); if (ret < 0) return ret; } else { if (!s->sh.disable_deblocking_filter_flag) ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size); } } } if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0) ff_hevc_set_qPy(s, x0, y0, log2_cb_size); x = y_cb * min_cb_width + x_cb; for (y = 0; y < length; y++) { memset(&s->qp_y_tab[x], lc->qp_y, length); x += min_cb_width; } if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 && ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) { lc->qPy_pred = lc->qp_y; } set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth); return 0; }从源代码可以看出,hls_coding_unit()主要进行了两个方面的处理:
(1)调用hls_prediction_unit()处理PU。其中, hls_prediction_unit()完成了以下两步工作:
(2)调用hls_transform_tree()处理TU树。
(1)解析码流得到运动矢量。HEVC中包含了Merge和AMVP两种运动矢量预测技术。对于使用Merge的码流,调用ff_hevc_luma_mv_merge_mode();对于使用AMVP的码流,调用hevc_luma_mv_mpv_mode()。而 hls_transform_tree(),
(2)根据运动矢量进行运动补偿。对于单向预测亮度运动补偿,调用luma_mc_uni(),对于单向预测色度运动补偿,调用chroma_mc_uni();对于双向预测亮度运动补偿,调用luma_mc_bi(),对于双向预测色度运动补偿,调用chroma_mc_bi()。
首先调用ff_hevc_split_transform_flag_decode()判断当前TU是否还需要划分。
如果需要划分的话,就会递归调用4次hls_transform_tree()分别对4个子块继续进行四叉树解析;如果不需要划分,就会调用hls_transform_unit()对TU进行解码。和前面递归划分CTU至CU的思路是一致的。最终会对每一个TU逐一调用hls_transform_unit()进行解码。