我们知道x264源码从主观上分为两大块,一是解析函数parse(),另一个是编码函数encode()。解析函数parse()较简单,并且不涉及H.264编码算法,故不是重点,所以我们要学习的重点自然而然地就是编码函数encode()了。
encode()函数是x264的主干函数,主要包括x264_encoder_open()函数、x264_encoder_headers()函数、x264_encoder_encode()函数与x264_encoder_close()函数四大部分,如下图所示,其中x264_encoder_encode()函数是其核心部分,具体的H.264视频编码算法均在此模块。
本文主要分析x264_encoder_open()函数,x264_encoder_open()用于打开编码器,其中初始化了libx264编码所需要的各种变量,如下图所示。它调用了下面的函数:
x264_validate_parameters():检查输入参数(例如输入图像的宽高是否为正数)。
x264_sps_init():初始化SPS
x264_pps_init():初始化PPS
x264_predict_16x16_init():初始化Intra16x16帧内预测汇编函数。
x264_predict_4x4_init():初始化Intra4x4帧内预测汇编函数。
x264_pixel_init():初始化像素值计算相关的汇编函数(包括SAD、SATD、SSD等)。
x264_dct_init():初始化DCT变换和DCT反变换相关的汇编函数。
x264_mc_init():初始化运动补偿相关的汇编函数。
x264_quant_init():初始化量化和反量化相关的汇编函数。
x264_deblock_init():初始化去块效应滤波器相关的汇编函数。
x264_lookahead_init():初始化Lookahead相关的变量。
x264_ratecontrol_new():初始化码率控制相关的变量。
对应的代码如下:
/******************************************************************/
/******************************************************************/
/*
======Analysed by RuiDong Fang
======Csdn Blog:http://blog.csdn.net/frd2009041510
======Date:2016.03.08
*/
/******************************************************************/
/******************************************************************/
/************====== x264_encoder_open()函数 ======************/
/*
功能:打开编码器
*/
/****************************************************************************
* x264_encoder_open:
****************************************************************************/
x264_t *x264_encoder_open( x264_param_t *param )
{
x264_t *h;
char buf[1000], *p;
int i_slicetype_length;
CHECKED_MALLOCZERO( h, sizeof(x264_t) );
/* Create a copy of param */
memcpy( &h->param, param, sizeof(x264_param_t) ); //将参数拷贝进来
if( param->param_free )
param->param_free( param );
#if HAVE_INTEL_DISPATCHER
x264_intel_dispatcher_override();
#endif
if( x264_threading_init() )
{
x264_log( h, X264_LOG_ERROR, "unable to initialize threading\n" );
goto fail;
}
if( x264_validate_parameters( h, 1 ) < 0 ) 检查输入参数(例如输入图像的宽高是否为正数)
goto fail;
if( h->param.psz_cqm_file )
if( x264_cqm_parse_file( h, h->param.psz_cqm_file ) < 0 )
goto fail;
if( h->param.rc.psz_stat_out )
h->param.rc.psz_stat_out = strdup( h->param.rc.psz_stat_out );
if( h->param.rc.psz_stat_in )
h->param.rc.psz_stat_in = strdup( h->param.rc.psz_stat_in );
x264_reduce_fraction( &h->param.i_fps_num, &h->param.i_fps_den );
x264_reduce_fraction( &h->param.i_timebase_num, &h->param.i_timebase_den );
/* Init x264_t */
h->i_frame = -1;
h->i_frame_num = 0;
if( h->param.i_avcintra_class )
h->i_idr_pic_id = 5;
else
h->i_idr_pic_id = 0;
if( (uint64_t)h->param.i_timebase_den * 2 > UINT32_MAX )
{
x264_log( h, X264_LOG_ERROR, "Effective timebase denominator %u exceeds H.264 maximum\n", h->param.i_timebase_den );
goto fail;
}
x264_set_aspect_ratio( h, &h->param, 1 );
//x264_sps_init()根据输入参数生成H.264码流的SPS (Sequence Parameter Set,序列参数集)信息
x264_sps_init( h->sps, h->param.i_sps_id, &h->param ); 初始化SPS
//x264_pps_init()根据输入参数生成H.264码流的PPS(Picture Parameter Set,图像参数集)信息
x264_pps_init( h->pps, h->param.i_sps_id, &h->param, h->sps ); 初始化PPS
x264_validate_levels( h, 1 ); //检查级Level-通过宏块个数等等
h->chroma_qp_table = i_chroma_qp_table + 12 + h->pps->i_chroma_qp_index_offset;
if( x264_cqm_init( h ) < 0 )
goto fail;
//赋值
h->mb.i_mb_width = h->sps->i_mb_width;
h->mb.i_mb_height = h->sps->i_mb_height;
h->mb.i_mb_count = h->mb.i_mb_width * h->mb.i_mb_height;
h->mb.chroma_h_shift = CHROMA_FORMAT == CHROMA_420 || CHROMA_FORMAT == CHROMA_422;
h->mb.chroma_v_shift = CHROMA_FORMAT == CHROMA_420;
/* Adaptive MBAFF and subme 0 are not supported as we require halving motion
* vectors during prediction, resulting in hpel mvs.
* The chosen solution is to make MBAFF non-adaptive in this case. */
h->mb.b_adaptive_mbaff = PARAM_INTERLACED && h->param.analyse.i_subpel_refine;
/* Init frames. */
if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS && !h->param.rc.b_stat_read )
h->frames.i_delay = X264_MAX(h->param.i_bframe,3)*4;
else
h->frames.i_delay = h->param.i_bframe;
if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size )
h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.rc.i_lookahead );
i_slicetype_length = h->frames.i_delay;
h->frames.i_delay += h->i_thread_frames - 1;
h->frames.i_delay += h->param.i_sync_lookahead;
h->frames.i_delay += h->param.b_vfr_input;
h->frames.i_bframe_delay = h->param.i_bframe ? (h->param.i_bframe_pyramid ? 2 : 1) : 0;
h->frames.i_max_ref0 = h->param.i_frame_reference;
h->frames.i_max_ref1 = X264_MIN( h->sps->vui.i_num_reorder_frames, h->param.i_frame_reference );
h->frames.i_max_dpb = h->sps->vui.i_max_dec_frame_buffering;
h->frames.b_have_lowres = !h->param.rc.b_stat_read
&& ( h->param.rc.i_rc_method == X264_RC_ABR
|| h->param.rc.i_rc_method == X264_RC_CRF
|| h->param.i_bframe_adaptive
|| h->param.i_scenecut_threshold
|| h->param.rc.b_mb_tree
|| h->param.analyse.i_weighted_pred );
h->frames.b_have_lowres |= h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0;
h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
h->frames.i_last_idr =
h->frames.i_last_keyframe = - h->param.i_keyint_max;
h->frames.i_input = 0;
h->frames.i_largest_pts = h->frames.i_second_largest_pts = -1;
h->frames.i_poc_last_open_gop = -1;
//CHECKED_MALLOCZERO(var, size)
//调用malloc()分配内存,然后调用memset()置零
CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
/* Allocate room for max refs plus a few extra just in case. */
CHECKED_MALLOCZERO( h->frames.unused[1], (h->i_thread_frames + X264_REF_MAX + 4) * sizeof(x264_frame_t *) );
CHECKED_MALLOCZERO( h->frames.current, (h->param.i_sync_lookahead + h->param.i_bframe
+ h->i_thread_frames + 3) * sizeof(x264_frame_t *) );
if( h->param.analyse.i_weighted_pred > 0 )
CHECKED_MALLOCZERO( h->frames.blank_unused, h->i_thread_frames * 4 * sizeof(x264_frame_t *) );
h->i_ref[0] = h->i_ref[1] = 0;
h->i_cpb_delay = h->i_coded_fields = h->i_disp_fields = 0;
h->i_prev_duration = ((uint64_t)h->param.i_fps_den * h->sps->vui.i_time_scale) / ((uint64_t)h->param.i_fps_num * h->sps->vui.i_num_units_in_tick);
h->i_disp_fields_last_frame = -1;
x264_rdo_init(); ///RDO初始化
/* init CPU functions */
//初始化包含汇编优化的函数
//帧内预测
x264_predict_16x16_init( h->param.cpu, h->predict_16x16 ); ///初始化Intra16x16帧内预测汇编函数(该函数的定义位于x264\common\predict.c)
x264_predict_8x8c_init( h->param.cpu, h->predict_8x8c );
x264_predict_8x16c_init( h->param.cpu, h->predict_8x16c );
x264_predict_8x8_init( h->param.cpu, h->predict_8x8, &h->predict_8x8_filter );
x264_predict_4x4_init( h->param.cpu, h->predict_4x4 ); ///初始化Intra4x4帧内预测汇编函数
x264_pixel_init( h->param.cpu, &h->pixf ); ///初始化像素值计算相关的汇编函数(包括SAD、SATD、SSD等)(该函数的定义位于common\pixel.c)
x264_dct_init( h->param.cpu, &h->dctf ); ///初始化DCT变换和DCT反变换相关的汇编函数(该函数的定义位于common\dct.c)
x264_zigzag_init( h->param.cpu, &h->zigzagf_progressive, &h->zigzagf_interlaced );//“之”字扫描
memcpy( &h->zigzagf, PARAM_INTERLACED ? &h->zigzagf_interlaced : &h->zigzagf_progressive, sizeof(h->zigzagf) );
x264_mc_init( h->param.cpu, &h->mc, h->param.b_cpu_independent ); ///初始化运动补偿相关的汇编函数(该函数的定义位于common\mc.c)
x264_quant_init( h, h->param.cpu, &h->quantf ); ///初始化量化和反量化相关的汇编函数(该函数的定义位于common\quant.c)
x264_deblock_init( h->param.cpu, &h->loopf, PARAM_INTERLACED ); ///初始化去块效应滤波器相关的汇编函数(该函数的定义位于common\deblock.c)
x264_bitstream_init( h->param.cpu, &h->bsf );
//初始化CABAC或者是CAVLC
if( h->param.b_cabac )
x264_cabac_init( h );
else
x264_stack_align( x264_cavlc_init, h );
//决定了像素比较的时候用SAD还是SATD
mbcmp_init( h );
chroma_dsp_init( h );
//CPU属性
p = buf + sprintf( buf, "using cpu capabilities:" );
for( int i = 0; x264_cpu_names[i].flags; i++ )
{
if( !strcmp(x264_cpu_names[i].name, "SSE")
&& h->param.cpu & (X264_CPU_SSE2) )
continue;
if( !strcmp(x264_cpu_names[i].name, "SSE2")
&& h->param.cpu & (X264_CPU_SSE2_IS_FAST|X264_CPU_SSE2_IS_SLOW) )
continue;
if( !strcmp(x264_cpu_names[i].name, "SSE3")
&& (h->param.cpu & X264_CPU_SSSE3 || !(h->param.cpu & X264_CPU_CACHELINE_64)) )
continue;
if( !strcmp(x264_cpu_names[i].name, "SSE4.1")
&& (h->param.cpu & X264_CPU_SSE42) )
continue;
if( !strcmp(x264_cpu_names[i].name, "BMI1")
&& (h->param.cpu & X264_CPU_BMI2) )
continue;
if( (h->param.cpu & x264_cpu_names[i].flags) == x264_cpu_names[i].flags
&& (!i || x264_cpu_names[i].flags != x264_cpu_names[i-1].flags) )
p += sprintf( p, " %s", x264_cpu_names[i].name );
}
if( !h->param.cpu )
p += sprintf( p, " none!" );
x264_log( h, X264_LOG_INFO, "%s\n", buf );
if( x264_analyse_init_costs( h ) )
goto fail;
static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
/* Checks for known miscompilation issues. */
if( h->cost_mv[X264_LOOKAHEAD_QP][2013] != cost_mv_correct[BIT_DEPTH-8] )
{
x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" );
goto fail;
}
/* Must be volatile or else GCC will optimize it out. */
volatile int temp = 392;
if( x264_clz( temp ) != 23 )
{
x264_log( h, X264_LOG_ERROR, "CLZ test failed: x264 has been miscompiled!\n" );
#if ARCH_X86 || ARCH_X86_64
x264_log( h, X264_LOG_ERROR, "Are you attempting to run an SSE4a/LZCNT-targeted build on a CPU that\n" );
x264_log( h, X264_LOG_ERROR, "doesn't support it?\n" );
#endif
goto fail;
}
h->out.i_nal = 0;
h->out.i_bitstream = X264_MAX( 1000000, h->param.i_width * h->param.i_height * 4
* ( h->param.rc.i_rc_method == X264_RC_ABR ? pow( 0.95, h->param.rc.i_qp_min )
: pow( 0.95, h->param.rc.i_qp_constant ) * X264_MAX( 1, h->param.rc.f_ip_factor )));
h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4 + 64; /* +4 for startcode, +64 for nal_escape assembly padding */
CHECKED_MALLOC( h->nal_buffer, h->nal_buffer_size );
CHECKED_MALLOC( h->reconfig_h, sizeof(x264_t) );
if( h->param.i_threads > 1 &&
x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
goto fail;
if( h->param.i_lookahead_threads > 1 &&
x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, NULL, NULL ) )
goto fail;
#if HAVE_OPENCL
if( h->param.b_opencl )
{
h->opencl.ocl = x264_opencl_load_library();
if( !h->opencl.ocl )
{
x264_log( h, X264_LOG_WARNING, "failed to load OpenCL\n" );
h->param.b_opencl = 0;
}
}
#endif
h->thread[0] = h;
for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
if( h->param.i_lookahead_threads > 1 )
for( int i = 0; i < h->param.i_lookahead_threads; i++ )
{
CHECKED_MALLOC( h->lookahead_thread[i], sizeof(x264_t) );
*h->lookahead_thread[i] = *h;
}
*h->reconfig_h = *h;
for( int i = 0; i < h->param.i_threads; i++ )
{
int init_nal_count = h->param.i_slice_count + 3;
int allocate_threadlocal_data = !h->param.b_sliced_threads || !i;
if( i > 0 )
*h->thread[i] = *h;
if( x264_pthread_mutex_init( &h->thread[i]->mutex, NULL ) )
goto fail;
if( x264_pthread_cond_init( &h->thread[i]->cv, NULL ) )
goto fail;
if( allocate_threadlocal_data )
{
h->thread[i]->fdec = x264_frame_pop_unused( h, 1 );
if( !h->thread[i]->fdec )
goto fail;
}
else
h->thread[i]->fdec = h->thread[0]->fdec;
CHECKED_MALLOC( h->thread[i]->out.p_bitstream, h->out.i_bitstream );
/* Start each thread with room for init_nal_count NAL units; it'll realloc later if needed. */
CHECKED_MALLOC( h->thread[i]->out.nal, init_nal_count*sizeof(x264_nal_t) );
h->thread[i]->out.i_nals_allocated = init_nal_count;
if( allocate_threadlocal_data && x264_macroblock_cache_allocate( h->thread[i] ) < 0 )
goto fail;
}
#if HAVE_OPENCL
if( h->param.b_opencl && x264_opencl_lookahead_init( h ) < 0 )
h->param.b_opencl = 0;
#endif
if( x264_lookahead_init( h, i_slicetype_length ) ) ///初始化lookahead
goto fail;
for( int i = 0; i < h->param.i_threads; i++ )
if( x264_macroblock_thread_allocate( h->thread[i], 0 ) < 0 )
goto fail;
if( x264_ratecontrol_new( h ) < 0 ) ///创建码率控制
goto fail;
if( h->param.i_nal_hrd )
{
x264_log( h, X264_LOG_DEBUG, "HRD bitrate: %i bits/sec\n", h->sps->vui.hrd.i_bit_rate_unscaled );
x264_log( h, X264_LOG_DEBUG, "CPB size: %i bits\n", h->sps->vui.hrd.i_cpb_size_unscaled );
}
if( h->param.psz_dump_yuv )
{
/* create or truncate the reconstructed video file */
FILE *f = x264_fopen( h->param.psz_dump_yuv, "w" );
if( !f )
{
x264_log( h, X264_LOG_ERROR, "dump_yuv: can't write to %s\n", h->param.psz_dump_yuv );
goto fail;
}
else if( !x264_is_regular_file( f ) )
{
x264_log( h, X264_LOG_ERROR, "dump_yuv: incompatible with non-regular file %s\n", h->param.psz_dump_yuv );
fclose( f );
goto fail;
}
fclose( f );
}
const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Constrained Baseline" :
h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
h->sps->i_profile_idc == PROFILE_HIGH10 ? (h->sps->b_constraint_set3 == 1 ? "High 10 Intra" : "High 10") :
h->sps->i_profile_idc == PROFILE_HIGH422 ? (h->sps->b_constraint_set3 == 1 ? "High 4:2:2 Intra" : "High 4:2:2") :
h->sps->b_constraint_set3 == 1 ? "High 4:4:4 Intra" : "High 4:4:4 Predictive";
char level[4];
snprintf( level, sizeof(level), "%d.%d", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
if( h->sps->i_level_idc == 9 || ( h->sps->i_level_idc == 11 && h->sps->b_constraint_set3 &&
(h->sps->i_profile_idc == PROFILE_BASELINE || h->sps->i_profile_idc == PROFILE_MAIN) ) )
strcpy( level, "1b" );
if( h->sps->i_profile_idc < PROFILE_HIGH10 ) //输出型和级
{
x264_log( h, X264_LOG_INFO, "profile %s, level %s\n",
profile, level );
}
else
{
static const char * const subsampling[4] = { "4:0:0", "4:2:0", "4:2:2", "4:4:4" };
x264_log( h, X264_LOG_INFO, "profile %s, level %s, %s %d-bit\n",
profile, level, subsampling[CHROMA_FORMAT], BIT_DEPTH );
}
return h;
fail:
x264_free( h ); //释放
return NULL;
}