这篇博客主要对AV1的帧内预测编码部分的代码进行分析。
AV1同其他标准一样,预测是为了生成当前块的预测器(Predictor),然后把当前块像素值与预测器的差值传递给编码器的下一个阶段。
在AV1的参考软件libaom-av1中,帧内预测从函数 av1_predict_intra_block_facade()
开始。
//AV1 帧内预测起始函数
void av1_predict_intra_block_facade(const AV1_COMMON* cm, MACROBLOCKD* xd, int plane, int blk_col, int blk_row, TX_SIZE tx_size)
{
const MB_MODE_INFO* const mbmi = xd->mi[0];
// 获取当前的plane(Y,U,V其中之一) 里面包含每个plane的信息包括pixel值
struct macroblockd_plane* const pd = &xd->plane[plane];
const int dst_stride = pd->dst.stride;
uint8_t* dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
// 获取预测模式(Encoder的话是上级函数有循环每个预测模式)
const PREDICTION_MODE mode = (plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
const int use_palette = mbmi->palette_mode_info.palette_size[plane != 0] > 0;
// 是否采用 filtering intra的预测模式
const FILTER_INTRA_MODE filter_intra_mode = (plane == AOM_PLANE_Y && mbmi->filter_intra_mode_info.use_filter_intra) ? mbmi->filter_intra_mode_info.filter_intra_mode : FILTER_INTRA_MODES;
// 对于角度预测模式,通过8个方向预测扩展为8*7=56个方向,每个模式的每个方向之间相差3°,也就是[-9°,-6°,-3°,方向模式本身,+3°,+6°,+9°]
const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
// 采用了CfL的情况下
if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED)
{
CFL_CTX* const cfl = &xd->cfl;
CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
if (cfl->dc_pred_is_cached[pred_plane] == 0)
{
av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode, dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
if (cfl->use_dc_pred_cache)
{
cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
cfl->dc_pred_is_cached[pred_plane] = 1;
}
}
else
{
cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
}
cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
return;
}
// 从该函数正式进入单个component的帧内预测
av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode, dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
}
进入到函数 av1_predict_intra_block
后,就开始建立intra predictor了,与其他的标准一样,帧内预测首先要准备好neighbors。
// 这个函数主要 1. 处理调色板模式,处理完直接返回;2. 非调色板模式的情况下,那么做帧内预测就需要知道当前块的周边块是否存在且可用。如果不可用的话,在下一级的函数,也就是build_intra_predictors函数里进行padding构建预测所需要的reference
void av1_predict_intra_block(const AV1_COMMON* cm, const MACROBLOCKD* xd, int wpx, int hpx, TX_SIZE tx_size, PREDICTION_MODE mode, int angle_delta, int use_palette,
FILTER_INTRA_MODE filter_intra_mode, const uint8_t* ref, int ref_stride, uint8_t* dst, int dst_stride, int col_off, int row_off, int plane)
{
const MB_MODE_INFO* const mbmi = xd->mi[0];
const int txwpx = tx_size_wide[tx_size];
const int txhpx = tx_size_high[tx_size];
const int x = col_off << MI_SIZE_LOG2;
const int y = row_off << MI_SIZE_LOG2;
// 对于调色板模式,直接参照palette map可以生成predictor,之后return出去
if (use_palette)
{
int r, c;
const uint8_t* const map = xd->plane[plane != 0].color_index_map + xd->color_index_map_offset[plane != 0];
const uint16_t* const palette = mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
if (is_cur_buf_hbd(xd))
{
uint16_t* dst16 = CONVERT_TO_SHORTPTR(dst);
for (r = 0; r < txhpx; ++r)
{
for (c = 0; c < txwpx; ++c)
{
dst16[r * dst_stride + c] = palette[map[(r + y) * wpx + c + x]];
}
}
}
else
{
for (r = 0; r < txhpx; ++r)
{
for (c = 0; c < txwpx; ++c)
{
dst[r * dst_stride + c] = (uint8_t)palette[map[(r + y) * wpx + c + x]];
}
}
}
return;
}
const struct macroblockd_plane* const pd = &xd->plane[plane];
const int txw = tx_size_wide_unit[tx_size];
const int txh = tx_size_high_unit[tx_size];
const int ss_x = pd->subsampling_x;
const int ss_y = pd->subsampling_y;
const int have_top = row_off || (ss_y ? xd->chroma_up_available : xd->up_available);
const int have_left = col_off || (ss_x ? xd->chroma_left_available : xd->left_available);
const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
const int xr_chr_offset = 0;
const int yd_chr_offset = 0;
// Distance between the right edge of this prediction block to
// the frame right edge
const int xr = (xd->mb_to_right_edge >> (3 + ss_x)) + (wpx - x - txwpx) - xr_chr_offset;
// Distance between the bottom edge of this prediction block to
// the frame bottom edge
const int yd = (xd->mb_to_bottom_edge >> (3 + ss_y)) + (hpx - y - txhpx) - yd_chr_offset;
const int right_available = mi_col + ((col_off + txw) << ss_x) < xd->tile.mi_col_end;
const int bottom_available = (yd > 0) && (mi_row + ((row_off + txh) << ss_y) < xd->tile.mi_row_end);
const PARTITION_TYPE partition = mbmi->partition;
BLOCK_SIZE bsize = mbmi->sb_type;
// force 4x4 chroma component block size.
if (ss_x || ss_y)
{
bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
}
// 检测左,左下,上,右上的neighbor是否存在
const int have_top_right = has_top_right(cm, bsize, mi_row, mi_col, have_top, right_available, partition, tx_size, row_off, col_off, ss_x, ss_y);
const int have_bottom_left = has_bottom_left(cm, bsize, mi_row, mi_col, bottom_available, have_left, partition, tx_size, row_off, col_off, ss_x, ss_y);
const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;
// 高于8 bit的情况
#if CONFIG_AV1_HIGHBITDEPTH
if (is_cur_buf_hbd(xd))
{
build_intra_predictors_high(
xd, ref, ref_stride, dst, dst_stride, mode, angle_delta,
filter_intra_mode, tx_size, disable_edge_filter,
have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
have_top_right ? AOMMIN(txwpx, xr) : 0,
have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
return;
}
#endif
// 得知了neighbor存在与否的状态后,进入生成predictor的步骤
build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode, tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0, have_top_right ? AOMMIN(txwpx, xr) : 0,
have_left ? AOMMIN(txhpx, yd + txhpx) : 0, have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
}
到该函数为止,已经知晓了当前块的四个方向的邻居(neighbor)是否存在,接下来,根据预测模式的不同,决定上,右上,左,左下四个方向的邻居是否会在生成预测器的计算过程中需要到,如果需要,且不存在,那么就要生成。
static void build_intra_predictors(const MACROBLOCKD* xd, const uint8_t* ref,
int ref_stride, uint8_t* dst, int dst_stride, PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode, TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px, int n_left_px, int n_bottomleft_px, int plane)
{
int i;
const uint8_t* above_ref = ref - ref_stride;
const uint8_t* left_ref = ref - 1;
DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
uint8_t* const above_row = above_data + 16;
uint8_t* const left_col = left_data + 16;
const int txwpx = tx_size_wide[tx_size];
const int txhpx = tx_size_high[tx_size];
int need_left = extend_modes[mode] & NEED_LEFT;
int need_above = extend_modes[mode] & NEED_ABOVE;
int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
int p_angle = 0;
const int is_dr_mode = av1_is_directional_mode(mode);
const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
// The default values if ref pixels are not available:
// 128 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
// 129 C D .. W X
// 129 E F .. U V
// 129 G H .. S T T T T T
// ..
// 方向预测模式才需要考虑参考sample存在的情况,其他模式,如DC,PAETH,和Smooth模式不需要
if (is_dr_mode)
{
p_angle = mode_to_angle_map[mode] + angle_delta;
if (p_angle <= 90)
need_above = 1, need_left = 0, need_above_left = 1;
else if (p_angle < 180)
need_above = 1, need_left = 1, need_above_left = 1;
else
need_above = 0, need_left = 1, need_above_left = 1;
}
// intra filtering的预测模式情况下,三个方向的reference sample都需要
if (use_filter_intra)
need_left = need_above = need_above_left = 1;
assert(n_top_px >= 0);
assert(n_topright_px >= 0);
assert(n_left_px >= 0);
assert(n_bottomleft_px >= 0);
if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0))
{
int val;
if (need_left) {
val = (n_top_px > 0) ? above_ref[0] : 129;
}
else {
val = (n_left_px > 0) ? left_ref[0] : 127;
}
for (i = 0; i < txhpx; ++i) {
memset(dst, val, txwpx);
dst += dst_stride;
}
return;
}
// 需要左边ref或者需要左下ref
if (need_left) {
int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
if (use_filter_intra) need_bottom = 0;
if (is_dr_mode) need_bottom = p_angle > 180;
// the avx2 dr_prediction_z2 may read at most 3 extra bytes,
// due to the avx2 mask load is with dword granularity.
// so we initialize 3 extra bytes to silence valgrind complain.
const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 3);
i = 0;
if (n_left_px > 0) {
for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
if (need_bottom && n_bottomleft_px > 0) {
assert(i == txhpx);
for (; i < txhpx + n_bottomleft_px; i++)
left_col[i] = left_ref[i * ref_stride];
}
if (i < num_left_pixels_needed)
memset(&left_col[i], left_col[i - 1], num_left_pixels_needed - i);
}
else {
if (n_top_px > 0) {
memset(left_col, above_ref[0], num_left_pixels_needed);
}
else {
memset(left_col, 129, num_left_pixels_needed);
}
}
}
// NEED_ABOVE
if (need_above) {
int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
if (use_filter_intra) need_right = 0;
if (is_dr_mode) need_right = p_angle < 90;
const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
if (n_top_px > 0) {
memcpy(above_row, above_ref, n_top_px);
i = n_top_px;
if (need_right && n_topright_px > 0) {
assert(n_top_px == txwpx);
memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
i += n_topright_px;
}
if (i < num_top_pixels_needed)
memset(&above_row[i], above_row[i - 1], num_top_pixels_needed - i);
}
else {
if (n_left_px > 0) {
memset(above_row, left_ref[0], num_top_pixels_needed);
}
else {
memset(above_row, 127, num_top_pixels_needed);
}
}
}
if (need_above_left) {
if (n_top_px > 0 && n_left_px > 0) {
above_row[-1] = above_ref[-1];
}
else if (n_top_px > 0) {
above_row[-1] = above_ref[0];
}
else if (n_left_px > 0) {
above_row[-1] = left_ref[0];
}
else {
above_row[-1] = 128;
}
left_col[-1] = above_row[-1];
}
if (use_filter_intra)
{
av1_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col, filter_intra_mode);
return;
}
if (is_dr_mode) {
int upsample_above = 0;
int upsample_left = 0;
if (!disable_edge_filter) {
const int need_right = p_angle < 90;
const int need_bottom = p_angle > 180;
const int filt_type = get_filt_type(xd, plane);
if (p_angle != 90 && p_angle != 180) {
const int ab_le = need_above_left ? 1 : 0;
if (need_above && need_left && (txwpx + txhpx >= 24)) {
filter_intra_edge_corner(above_row, left_col);
}
if (need_above && n_top_px > 0) {
const int strength =
intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
av1_filter_intra_edge(above_row - ab_le, n_px, strength);
}
if (need_left && n_left_px > 0) {
const int strength = intra_edge_filter_strength(
txhpx, txwpx, p_angle - 180, filt_type);
const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
av1_filter_intra_edge(left_col - ab_le, n_px, strength);
}
}
upsample_above =
av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
if (need_above && upsample_above) {
const int n_px = txwpx + (need_right ? txhpx : 0);
av1_upsample_intra_edge(above_row, n_px);
}
upsample_left =
av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
if (need_left && upsample_left) {
const int n_px = txhpx + (need_bottom ? txwpx : 0);
av1_upsample_intra_edge(left_col, n_px);
}
}
dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above, upsample_left, p_angle);
return;
}
// DC模式
if (mode == DC_PRED) {
dc_pred[n_left_px > 0][n_top_px > 0][tx_size](dst, dst_stride, above_row, left_col);
}
// 非方向预测模式中除去DC模式外的其他模式
else {
pred[mode][tx_size](dst, dst_stride, above_row, left_col);
}
}