在分析源码之前,再简要讲一下macroblock-tree的原理,我们知道,x264为了降低码率,需要对每个宏块除以一个量化偏移值qp,但qp越大,该宏块中像素的精度就越低。为了提高质量,需要知道哪些宏块更“重要”,哪些比较“次要”,“重要”宏块除以较小的qp,这样精度损失小,而“次要”的宏块可以给大一些的qp,因为它精度损失对画质影响不明显。那么如何判断一个宏块的重要程度呢,主要有两个方面:1.看宏块中图像的细节变化程度,如果一个图像的细节很丰富,如果量化值过高,会造成图像很容易看起来不清晰,细节程度可以用帧内编码损耗(intra_costs)大小来近似评判。 2.宏块被参考的程度,如果该宏块作为参考宏块,被别的宏块参考的越多,说明它的质量的影响范围就越大,也就是上两篇文章说的传播信息(propagate information)。
下面我们看下x264_macroblock_tree_propagate函数是如何计算的:
static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, float average_duration, int p0, int p1, int b, int referenced )
{
uint16_t *ref_costs[2] = {frames[p0]->i_propagate_cost,frames[p1]->i_propagate_cost};
int dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
int16_t (*mvs[2])[2] = { frames[b]->lowres_mvs[0][b-p0-1], frames[b]->lowres_mvs[1][p1-b-1] };
int bipred_weights[2] = {i_bipred_weight, 64 - i_bipred_weight};
int16_t *buf = h->scratch_buffer;
uint16_t *propagate_cost = frames[b]->i_propagate_cost;
uint16_t *lowres_costs = frames[b]->lowres_costs[b-p0][p1-b];
x264_emms();
float fps_factor = CLIP_DURATION(frames[b]->f_duration) / (CLIP_DURATION(average_duration) * 256.0f) * MBTREE_PRECISION;
/* For non-reffed frames the source costs are always zero, so just memset one row and re-use it. */
if( !referenced )
memset( frames[b]->i_propagate_cost, 0, h->mb.i_mb_width * sizeof(uint16_t) );
for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->mb.i_mb_height; h->mb.i_mb_y++ ) //遍历每一行
{
int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride;
//1.根据帧内损耗和帧间损耗,计算出传播信息propagate_cost
h->mc.mbtree_propagate_cost( buf, propagate_cost,
frames[b]->i_intra_cost+mb_index, lowres_costs+mb_index,
frames[b]->i_inv_qscale_factor+mb_index, &fps_factor, h->mb.i_mb_width );
if( referenced )
propagate_cost += h->mb.i_mb_width;
//2.由编码帧及运动向量,算出传播信息propagate累加到它的参考帧宏块上
h->mc.mbtree_propagate_list( h, ref_costs[0], &mvs[0][mb_index], buf, &lowres_costs[mb_index],
bipred_weights[0], h->mb.i_mb_y, h->mb.i_mb_width, 0 );
if( b != p1 )
{
h->mc.mbtree_propagate_list( h, ref_costs[1], &mvs[1][mb_index], buf, &lowres_costs[mb_index],
bipred_weights[1], h->mb.i_mb_y, h->mb.i_mb_width, 1 );
}
}
if( h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead && referenced ) //3.根据传播信息propagate_cost来计算出每个宏块的量化偏移值qp
x264_macroblock_tree_finish( h, frames[b], average_duration, b == p1 ? b - p0 : 0 );
}
从源码可知,只要分为三步:
1.调用 h->mc.mbtree_propagate_cost,实际上是调用mbtree_propagate_cost函数,如下:
static void mbtree_propagate_cost( int16_t *dst, uint16_t *propagate_in, uint16_t *intra_costs,
uint16_t *inter_costs, uint16_t *inv_qscales, float *fps_factor, int len )
{
float fps = *fps_factor;
for( int i = 0; i < len; i++ )
{
int intra_cost = intra_costs[i];
int inter_cost = X264_MIN(intra_costs[i], inter_costs[i] & LOWRES_COST_MASK);
float propagate_intra = intra_cost * inv_qscales[i];
float propagate_amount = propagate_in[i] + propagate_intra*fps;
float propagate_num = intra_cost - inter_cost;
float propagate_denom = intra_cost;
dst[i] = X264_MIN((int)(propagate_amount * propagate_num / propagate_denom + 0.5f), 32767);
}
}
下面解释一下里面的几个变量:
1.propagate_intra代表帧内损耗大小,对应上面简介的1。
2.propagate_in实际上代表propagate information,就是它作为参考帧被依赖的程度,对应2,在mbtree_propagate_list函数中计算得到,后面会讲到。
3.propagate_amount代表propagate_intra和propagate_in的总和。
4.propagate_num / propagate_denom,即(intra_cost - inter_cost) / intra_cost, 为什么propagate_amount最后需要乘以(intra_cost - inter_cost) / intra_cost呢,可以看下文档中的一段话:
我们可以理解为,propagate_amount中的传播信息,其中有一部分是来自于它的参考帧,而不是代表它本身的“重要性”的,我们要把不属于它自己的那部分去掉,如何知道参考帧的那部分的比例呢,可以用一个粗糙的方法:用帧间损耗与帧内损耗的比值可以近似得到。
继续看第2步,调用h->mc.mbtree_propagate_list,实际上是调用mbtree_propagate_list函数,如下:
static void mbtree_propagate_list( x264_t *h, uint16_t *ref_costs, int16_t (*mvs)[2],
int16_t *propagate_amount, uint16_t *lowres_costs,
int bipred_weight, int mb_y, int len, int list )
{
//ref_costs是参考帧的传播信息,propagate_amount是待编码帧的传播信息
unsigned stride = h->mb.i_mb_stride;
unsigned width = h->mb.i_mb_width;
unsigned height = h->mb.i_mb_height;
for( unsigned i = 0; i < len; i++ )
{
int lists_used = lowres_costs[i]>>LOWRES_COST_SHIFT;
if( !(lists_used & (1 << list)) )
continue;
int listamount = propagate_amount[i];
/* Apply bipred weighting. */
if( lists_used == 3 )
listamount = (listamount * bipred_weight + 32) >> 6;
/* Early termination for simple case of mv0. */
if( !M32( mvs[i] ) )
{
MC_CLIP_ADD( ref_costs[mb_y*stride + i], listamount );
continue;
}
int x = mvs[i][0]; //运动向量
int y = mvs[i][1];
unsigned mbx = (x>>5)+i; //参考宏块的坐标
unsigned mby = (y>>5)+mb_y;
unsigned idx0 = mbx + mby * stride;
unsigned idx2 = idx0 + stride;
x &= 31;
y &= 31;
int idx0weight = (32-y)*(32-x);
int idx1weight = (32-y)*x;
int idx2weight = y*(32-x);
int idx3weight = y*x; //权重
idx0weight = (idx0weight * listamount + 512) >> 10;
idx1weight = (idx1weight * listamount + 512) >> 10;
idx2weight = (idx2weight * listamount + 512) >> 10;
idx3weight = (idx3weight * listamount + 512) >> 10;
if( mbx < width-1 && mby < height-1 )
{
MC_CLIP_ADD( ref_costs[idx0+0], idx0weight ); //将传播信息累加到4个参考宏块中
MC_CLIP_ADD( ref_costs[idx0+1], idx1weight );
MC_CLIP_ADD( ref_costs[idx2+0], idx2weight );
MC_CLIP_ADD( ref_costs[idx2+1], idx3weight );
}
else
{
/* Note: this takes advantage of unsigned representation to
* catch negative mbx/mby. */
if( mby < height )
{
if( mbx < width )
MC_CLIP_ADD( ref_costs[idx0+0], idx0weight );
if( mbx+1 < width )
MC_CLIP_ADD( ref_costs[idx0+1], idx1weight );
}
if( mby+1 < height )
{
if( mbx < width )
MC_CLIP_ADD( ref_costs[idx2+0], idx2weight );
if( mbx+1 < width )
MC_CLIP_ADD( ref_costs[idx2+1], idx3weight );
}
}
}
}
他实际上是根据待编码帧与参考帧之间运动向量,就算参考帧的被依赖的程度,也就是上面说的变量propagate_in。要注意一点的是,它不仅对待编码帧对应的参考宏块做了累加,同时也对参考宏块附近的3个宏块做了累加,还不太确定为何要这样做。画了个图辅助理解:
最后第3步,调用x264_macroblock_tree_finish函数,它根据propagate information大小,决定量化偏移值,详见代码:
static void x264_macroblock_tree_finish( x264_t *h, x264_frame_t *frame, float average_duration, int ref0_distance )
{
int fps_factor = round( CLIP_DURATION(average_duration) / CLIP_DURATION(frame->f_duration) * 256 / MBTREE_PRECISION );
float weightdelta = 0.0;
if( ref0_distance && frame->f_weighted_cost_delta[ref0_distance-1] > 0 )
weightdelta = (1.0 - frame->f_weighted_cost_delta[ref0_distance-1]);
/* Allow the strength to be adjusted via qcompress, since the two
* concepts are very similar. */
float strength = 5.0f * (1.0f - h->param.rc.f_qcompress);
for( int mb_index = 0; mb_index < h->mb.i_mb_count; mb_index++ )
{
int intra_cost = (frame->i_intra_cost[mb_index] * frame->i_inv_qscale_factor[mb_index] + 128) >> 8;
if( intra_cost )
{
int propagate_cost = (frame->i_propagate_cost[mb_index] * fps_factor + 128) >> 8;
float log2_ratio = x264_log2(intra_cost + propagate_cost) - x264_log2(intra_cost) + weightdelta;
frame->f_qp_offset[mb_index] = frame->f_qp_offset_aq[mb_index] - strength * log2_ratio;
}
}
}