x264 码率控制,前面一中讲到,是通过rcc->last_satd计算当前帧qp值的。那么last_satd如何得到的呢?
last_satd = int x264_rc_analyse_slice( x264_t *h );
函数中把每一行计算的row_satd 累加起来。
每一行中的row_satd如何得到的呢?
lookahead线程启动:
lookahead_thread()->lookahead_thread_internal()//注意这里不是说的帧内lookahead,这里指的是lookahead内部实现
-->lookahead_slicetype_decide()// lookahead内部决定了最终的编码帧类型
-->x264_slicetype_analyse()
if( h->param.rc.b_mb_tree )
macroblock_tree( h, &a, frames, X264_MIN(num_frames, h->param.i_keyint_max), keyframe );
while( i-- > idx )
{
cur_nonb = i;
while( IS_X264_TYPE_B( frames[cur_nonb]->i_type ) && cur_nonb > 0 )
cur_nonb--; // 如果是B帧 则跳过去
if( cur_nonb < idx )
break;
slicetype_frame_cost( h, a, frames, cur_nonb, last_nonb, last_nonb );
}
//核心函数slicetype_frame_cost(h,a,frames,cur_nonb,last_nonb,last_nonb);
static int slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
x264_frame_t **frames, int p0, int p1, int b )
{// 由于外层是一个循环,在来回调用,这里先判断下,之前是不是计算过,免得重复计算
int i_score = 0;
int do_search[2];
const x264_weight_t *w = x264_weight_none;
x264_frame_t *fenc = frames[b];
/* Check whether we already evaluated this frame
* If we have tried this frame as P, then we have also tried
* the preceding frames as B. (is this still true?) */
/* Also check that we already calculated the row SATDs for the current frame. */
if( fenc->i_cost_est[b-p0][p1-b] >= 0 &&
(!h->param.rc.i_vbv_buffer_size || fenc->i_row_satds[b-p0][p1-b][0] != -1) )
{// 之前已经计算过了
i_score = fenc->i_cost_est[b-p0][p1-b];// 初始值,如果已经计算过,就直接返回i_score
}
else
{
// 启动多线程计算cost
for( int i = 0; i < h->param.i_lookahead_threads; i++ )
{
x264_t *t = h->lookahead_thread[i];
/* FIXME move this somewhere else */
t->mb.i_me_method = h->mb.i_me_method;
t->mb.i_subpel_refine = h->mb.i_subpel_refine;
t->mb.b_chroma_me = h->mb.b_chroma_me;
s[i] = (x264_slicetype_slice_t){ t, a, frames, p0, p1, b, dist_scale_factor, do_search, w,
output_inter[i], output_intra[i] };
t->i_threadslice_start = ((h->mb.i_mb_height * i + h->param.i_lookahead_threads/2) / h->param.i_lookahead_threads);
t->i_threadslice_end = ((h->mb.i_mb_height * (i+1) + h->param.i_lookahead_threads/2) / h->param.i_lookahead_threads);
int thread_height = t->i_threadslice_end - t->i_threadslice_start;
int thread_output_size = thread_height + NUM_INTS;
memset( output_inter[i], 0, thread_output_size * sizeof(int) );
memset( output_intra[i], 0, thread_output_size * sizeof(int) );
output_inter[i][NUM_ROWS] = output_intra[i][NUM_ROWS] = thread_height;
output_inter[i+1] = output_inter[i] + thread_output_size + PAD_SIZE;
output_intra[i+1] = output_intra[i] + thread_output_size + PAD_SIZE;
x264_threadpool_run( h->lookaheadpool, (void*)slicetype_slice_cost, &s[i] );
}
for( int i = 0; i < h->param.i_lookahead_threads; i++ )
x264_threadpool_wait( h->lookaheadpool, &s[i] );
//等待多线程计算结束
}
// slice cost计算
static void slicetype_slice_cost( x264_slicetype_slice_t *s )
{
x264_t *h = s->h;
/* Lowres lookahead goes backwards because the MVs are used as predictors in the main encode.
* This considerably improves MV prediction overall. */
/* The edge mbs seem to reduce the predictive quality of the
* whole frame's score, but are needed for a spatial distribution. */
int do_edges = h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size || h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2;
int start_y = X264_MIN( h->i_threadslice_end - 1, h->mb.i_mb_height - 2 + do_edges );
int end_y = X264_MAX( h->i_threadslice_start, 1 - do_edges );
int start_x = h->mb.i_mb_width - 2 + do_edges;
int end_x = 1 - do_edges;
for( h->mb.i_mb_y = start_y; h->mb.i_mb_y >= end_y; h->mb.i_mb_y-- )
for( h->mb.i_mb_x = start_x; h->mb.i_mb_x >= end_x; h->mb.i_mb_x-- )
slicetype_mb_cost( h, s->a, s->frames, s->p0, s->p1, s->b, s->dist_scale_factor,
s->do_search, s->w, s->output_inter, s->output_intra );
}
//mbcmp 系列的函数
static void mbcmp_init( x264_t *h )
{
int satd = !h->mb.b_lossless && h->param.analyse.i_subpel_refine > 1;
memcpy( h->pixf.mbcmp, satd ? h->pixf.satd : h->pixf.sad_aligned, sizeof(h->pixf.mbcmp) );
memcpy( h->pixf.mbcmp_unaligned, satd ? h->pixf.satd : h->pixf.sad, sizeof(h->pixf.mbcmp_unaligned) );
h->pixf.intra_mbcmp_x3_16x16 = satd ? h->pixf.intra_satd_x3_16x16 : h->pixf.intra_sad_x3_16x16;
h->pixf.intra_mbcmp_x3_8x16c = satd ? h->pixf.intra_satd_x3_8x16c : h->pixf.intra_sad_x3_8x16c;
h->pixf.intra_mbcmp_x3_8x8c = satd ? h->pixf.intra_satd_x3_8x8c : h->pixf.intra_sad_x3_8x8c;
h->pixf.intra_mbcmp_x3_8x8 = satd ? h->pixf.intra_sa8d_x3_8x8 : h->pixf.intra_sad_x3_8x8;
h->pixf.intra_mbcmp_x3_4x4 = satd ? h->pixf.intra_satd_x3_4x4 : h->pixf.intra_sad_x3_4x4;
h->pixf.intra_mbcmp_x9_4x4 = h->param.b_cpu_independent || h->mb.b_lossless ? NULL
: satd ? h->pixf.intra_satd_x9_4x4 : h->pixf.intra_sad_x9_4x4;
h->pixf.intra_mbcmp_x9_8x8 = h->param.b_cpu_independent || h->mb.b_lossless ? NULL
: satd ? h->pixf.intra_sa8d_x9_8x8 : h->pixf.intra_sad_x9_8x8;
satd &= h->param.analyse.i_me_method == X264_ME_TESA;
memcpy( h->pixf.fpelcmp, satd ? h->pixf.satd : h->pixf.sad, sizeof(h->pixf.fpelcmp) );
memcpy( h->pixf.fpelcmp_x3, satd ? h->pixf.satd_x3 : h->pixf.sad_x3, sizeof(h->pixf.fpelcmp_x3) );
memcpy( h->pixf.fpelcmp_x4, satd ? h->pixf.satd_x4 : h->pixf.sad_x4, sizeof(h->pixf.fpelcmp_x4) );
}
static void slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
x264_frame_t **frames, int p0, int p1, int b,
int dist_scale_factor, int do_search[2], const x264_weight_t *w,
int *output_inter, int *output_intra )
{// 计算结果最终填充在output_inter,output_intra,计算过程略复杂,后续再继续走读
x264_frame_t *fref0 = frames[p0];
x264_frame_t *fref1 = frames[p1];
x264_frame_t *fenc = frames[b];
// 先取得当前编码帧,和前后参考帧
const int b_bidir = (b < p1);
const int i_mb_x = h->mb.i_mb_x;
const int i_mb_y = h->mb.i_mb_y;
const int i_mb_stride = h->mb.i_mb_width;
const int i_mb_xy = i_mb_x + i_mb_y * i_mb_stride;
const int i_stride = fenc->i_stride_lowres;
const int i_pel_offset = 8 * (i_mb_x + i_mb_y * i_stride);
const int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
int16_t (*fenc_mvs[2])[2] = { &fenc->lowres_mvs[0][b-p0-1][i_mb_xy], &fenc->lowres_mvs[1][p1-b-1][i_mb_xy] };
int (*fenc_costs[2]) = { &fenc->lowres_mv_costs[0][b-p0-1][i_mb_xy], &fenc->lowres_mv_costs[1][p1-b-1][i_mb_xy] };
// 半分辨率的cost初始化
int b_frame_score_mb = (i_mb_x > 0 && i_mb_x < h->mb.i_mb_width - 1 &&
i_mb_y > 0 && i_mb_y < h->mb.i_mb_height - 1) ||
h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2;
// 计算cost
}
//再看看x264_rc_analyse_slice() 如何读取结果的
cost = frames[b]->i_cost_est[b-p0][p1-b];
assert( cost >= 0 );
if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read )
{
cost = slicetype_frame_cost_recalculate( h, frames, p0, p1, b );
if( b && h->param.rc.i_vbv_buffer_size )
slicetype_frame_cost_recalculate( h, frames, b, b, b );
}
/* In AQ, use the weighted score instead. */
else if( h->param.rc.i_aq_mode )
cost = frames[b]->i_cost_est_aq[b-p0][p1-b];
return cost;// 返回lookahead中计算的cost
//下面这个开关一般不会开,把I帧分摊到P帧里面,就先不走读了,看了下,没太明白。
if( h->param.b_intra_refresh && h->param.rc.i_vbv_buffer_size && h->fenc->i_type == X264_TYPE_P )
{
int ip_factor = 256 * h->param.rc.f_ip_factor; /* fix8 */
for( int y = 0; y < h->mb.i_mb_height; y++ )
{
int mb_xy = y * h->mb.i_mb_stride + h->fdec->i_pir_start_col;
for( int x = h->fdec->i_pir_start_col; x <= h->fdec->i_pir_end_col; x++, mb_xy++ )
{
int intra_cost = (h->fenc->i_intra_cost[mb_xy] * ip_factor + 128) >> 8;
int inter_cost = h->fenc->lowres_costs[b-p0][p1-b][mb_xy] & LOWRES_COST_MASK;
int diff = intra_cost - inter_cost; // 帧内参考 减去 帧间残差,这里为何这样计算,有待考究。
if( h->param.rc.i_aq_mode )
h->fdec->i_row_satd[y] += (diff * frames[b]->i_inv_qscale_factor[mb_xy] + 128) >> 8;
else
h->fdec->i_row_satd[y] += diff;
cost += diff;
}
}
}