x264代码的主输入口:main()函数
int main( int argc, char **argv )
{
x264_param_t param; // 定义编码器参数结构体
cli_opt_t opt = {0}; // 此结构体是记录一些与编码关系较小的设置信息
int ret = 0;
FAIL_IF_ERROR( x264_threading_init(), "unable to initialize threading\n" );
#ifdef _WIN32
FAIL_IF_ERROR( !get_argv_utf8( &argc, &argv ), "unable to convert command line to UTF-8\n" );
GetConsoleTitleW( org_console_title, CONSOLE_TITLE_SIZE );
_setmode( _fileno( stdin ), _O_BINARY );
_setmode( _fileno( stdout ), _O_BINARY );
_setmode( _fileno( stderr ), _O_BINARY );
#endif
/* Parse command line 解析命令行*/
if( parse( argc, argv, ¶m, &opt ) < 0 ) // 参数赋值以及初始化参数赋值
ret = -1;
#ifdef _WIN32
/* Restore title; it can be changed by input modules */
SetConsoleTitleW( org_console_title );
#endif
/* Control-C handler */
signal( SIGINT, sigint_handler ); // 捕获Control-C 按键信号
if( !ret )
ret = encode( ¶m, &opt ); // 依据所传输的参数,开始编码
/* clean up handles */
if( filter.free )
filter.free( opt.hin ); // 释放输入yuv文件
else if( opt.hin )
cli_input.close_file( opt.hin ); // 关闭输入yuv文件句柄
if( opt.hout )
cli_output.close_file( opt.hout, 0, 0 ); // 关闭编码后输出文件句柄
if( opt.tcfile_out )
fclose( opt.tcfile_out );
if( opt.qpfile )
fclose( opt.qpfile ); // 关闭qp文件句柄
#ifdef _WIN32
SetConsoleTitleW( org_console_title );
free( argv );
#endif
return ret;
}
encode()函数:
static int encode( x264_param_t *param, cli_opt_t *opt )
{
x264_t *h = NULL; // 编码所包含的所有参数结构体信息
x264_picture_t pic;
cli_pic_t cli_pic;
const cli_pulldown_t *pulldown = NULL; // shut up gcc
int i_frame = 0; // 帧索引
int i_frame_output = 0; // 输出编码帧统计
int64_t i_end, i_previous = 0, i_start = 0; // 结束时间、状态统计后时间、开始时间
int64_t i_file = 0; // 记录文件的尺寸大小
int i_frame_size; // 帧数据尺寸
int64_t last_dts = 0; // 最新编码帧的dts
int64_t prev_dts = 0; // 最新编码帧前一帧的dts
int64_t first_dts = 0; // 第一帧编码帧的dts
# define MAX_PTS_WARNING 3 /* arbitrary */
int pts_warning_cnt = 0;
int64_t largest_pts = -1;
int64_t second_largest_pts = -1;
int64_t ticks_per_frame;
double duration;
double pulldown_pts = 0;
int retval = 0; // 函数返回值
opt->b_progress &= param->i_log_level < X264_LOG_DEBUG;
/* set up pulldown ,一般电影为24fps,而电视为50fps或者60fps,这就需要将24fps转换到50或60fps,这个过程就叫做pulldown 模式 */
if( opt->i_pulldown && !param->b_vfr_input ) // 由上面的解释可以得出,帧率必须恒定才可使用pulldown模式,试想变帧率如何进行pulldown模式呢
{
param->b_pulldown = 1; // 1表示使用pulldown模式,而0表示不使用pulldown模式
param->b_pic_struct = 1;
pulldown = &pulldown_values[opt->i_pulldown];
param->i_timebase_num = param->i_fps_den; //帧率分母为时间基分子
FAIL_IF_ERROR2( fmod( param->i_fps_num * pulldown->fps_factor, 1 ),
"unsupported framerate for chosen pulldown\n" );
param->i_timebase_den = param->i_fps_num * pulldown->fps_factor;
}
h = x264_encoder_open( param ); // 打开X264编码器
FAIL_IF_ERROR2( !h, "x264_encoder_open failed\n" );
x264_encoder_parameters( h, param ); // 编码器参数设置
FAIL_IF_ERROR2( cli_output.set_param( opt->hout, param ), "can't set outfile param\n" );
i_start = x264_mdate(); // 记录开始时间
/* ticks/frame = ticks/second / frames/second */
ticks_per_frame = (int64_t)param->i_timebase_den * param->i_fps_den / param->i_timebase_num / param->i_fps_num;
FAIL_IF_ERROR2( ticks_per_frame < 1 && !param->b_vfr_input, "ticks_per_frame invalid: %"PRId64"\n", ticks_per_frame );
ticks_per_frame = X264_MAX( ticks_per_frame, 1 );
if( !param->b_repeat_headers )
{
// Write SPS/PPS/SEI
x264_nal_t *headers;
int i_nal;
FAIL_IF_ERROR2( x264_encoder_headers( h, &headers, &i_nal ) < 0, "x264_encoder_headers failed\n" ); // 写入NAL头信息
FAIL_IF_ERROR2( (i_file = cli_output.write_headers( opt->hout, headers )) < 0, "error writing headers to output file\n" );
}
if( opt->tcfile_out )
fprintf( opt->tcfile_out, "# timecode format v2\n" );
/* Encode frames 对每帧图像进行编码 */
for( ; !b_ctrl_c && (i_frame < param->i_frame_total || !param->i_frame_total); i_frame++ )
{
if( filter.get_frame( opt->hin, &cli_pic, i_frame + opt->i_seek ) )
break;
x264_picture_init( &pic ); // 初始化pic变量
convert_cli_to_lib_pic( &pic, &cli_pic );
if( !param->b_vfr_input ) // 判断是否为可变帧率
pic.i_pts = i_frame;
if( opt->i_pulldown && !param->b_vfr_input )
{
pic.i_pic_struct = pulldown->pattern[ i_frame % pulldown->mod ];
pic.i_pts = (int64_t)( pulldown_pts + 0.5 );
pulldown_pts += pulldown_frame_duration[pic.i_pic_struct];
}
else if( opt->timebase_convert_multiplier )
pic.i_pts = (int64_t)( pic.i_pts * opt->timebase_convert_multiplier + 0.5 );
if( pic.i_pts <= largest_pts ) // 时刻检验pts是否严格递增的
{
if( cli_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
x264_cli_log( "x264", X264_LOG_WARNING, "non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
i_frame, pic.i_pts, largest_pts );
else if( pts_warning_cnt == MAX_PTS_WARNING )
x264_cli_log( "x264", X264_LOG_WARNING, "too many nonmonotonic pts warnings, suppressing further ones\n" );
pts_warning_cnt++;
pic.i_pts = largest_pts + ticks_per_frame;
}
second_largest_pts = largest_pts; // 更新第二大pts数值
largest_pts = pic.i_pts; // 更新pts最大值
if( opt->tcfile_out )
fprintf( opt->tcfile_out, "%.6f\n", pic.i_pts * ((double)param->i_timebase_num / param->i_timebase_den) * 1e3 );
if( opt->qpfile )
parse_qpfile( opt, &pic, i_frame + opt->i_seek ); // 解析qp参数文件
prev_dts = last_dts; // dts值赋值给prev_dts保存
i_frame_size = encode_frame( h, opt->hout, &pic, &last_dts ); // 编码单帧,得到最新的dts值,返回帧数据
if( i_frame_size < 0 )
{
b_ctrl_c = 1; /* lie to exit the loop */
retval = -1;
}
else if( i_frame_size )
{
i_file += i_frame_size;
i_frame_output++; // 编码输出帧数
if( i_frame_output == 1 )
first_dts = prev_dts = last_dts;
}
if( filter.release_frame( opt->hin, &cli_pic, i_frame + opt->i_seek ) )
break;
/* update status line (up to 1000 times per input file) */
if( opt->b_progress && i_frame_output )
i_previous = print_status( i_start, i_previous, i_frame_output, param->i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts );
}
/* Flush delayed frames */
while( !b_ctrl_c && x264_encoder_delayed_frames( h ) )
{
prev_dts = last_dts;
i_frame_size = encode_frame( h, opt->hout, NULL, &last_dts );
if( i_frame_size < 0 )
{
b_ctrl_c = 1; /* lie to exit the loop */
retval = -1;
}
else if( i_frame_size )
{
i_file += i_frame_size;
i_frame_output++;
if( i_frame_output == 1 )
first_dts = prev_dts = last_dts;
}
if( opt->b_progress && i_frame_output )
i_previous = print_status( i_start, i_previous, i_frame_output, param->i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts );
}
fail:
if( pts_warning_cnt >= MAX_PTS_WARNING && cli_log_level < X264_LOG_DEBUG )
x264_cli_log( "x264", X264_LOG_WARNING, "%d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
/* duration algorithm fails when only 1 frame is output */
if( i_frame_output == 1 )
duration = (double)param->i_fps_den / param->i_fps_num;
else if( b_ctrl_c )
duration = (double)(2 * last_dts - prev_dts - first_dts) * param->i_timebase_num / param->i_timebase_den;
else
duration = (double)(2 * largest_pts - second_largest_pts) * param->i_timebase_num / param->i_timebase_den;
i_end = x264_mdate(); // 编码结束时间
/* Erase progress indicator before printing encoding stats. */
if( opt->b_progress )
fprintf( stderr, " \r" );
if( h )
x264_encoder_close( h ); // 关闭解码器
fprintf( stderr, "\n" );
if( b_ctrl_c )
fprintf( stderr, "aborted at input frame %d, output frame %d\n", opt->i_seek + i_frame, i_frame_output );
cli_output.close_file( opt->hout, largest_pts, second_largest_pts );
opt->hout = NULL;
if( i_frame_output > 0 )
{
double fps = (double)i_frame_output * (double)1000000 /
(double)( i_end - i_start ); // 求帧率
fprintf( stderr, "encoded %d frames, %.2f fps, %.2f kb/s\n", i_frame_output, fps,
(double) i_file * 8 / ( 1000 * duration ) );
}
return retval;
}
其中涉及到非常重要的几个结构体分别为 x264_param_t、x264_t 和 x264_picture_t。
x264_param_t 结构体,此结构体基本涵盖了命令行所涉及到的参数:
typedef struct x264_param_t
{
/* CPU flags */
unsigned int cpu;
int i_threads; /* encode multiple frames in parallel,多线程并行编码多帧 */
int i_lookahead_threads; /* multiple threads for lookahead analysis,提前分析线程个数 */
int b_sliced_threads; /* Whether to use slice-based threading. 标识是否使用基于slice的线程处理 */
int b_deterministic; /* whether to allow non-deterministic optimizations when threaded */
int b_cpu_independent; /* force canonical behavior rather than cpu-dependent optimal algorithms 是否依赖CPU内部优化算法*/
int i_sync_lookahead; /* threaded lookahead buffer 线程化帧分析缓存*/
/* Video Properties */
int i_width; // 视频分辨率宽度
int i_height; // 视频分辨率高度
int i_csp; /* CSP of encoded bitstream 色彩空间类型 calorspace type*/
int i_level_idc; // level
int i_frame_total; /* number of frames to encode if known, else 0 */
/* NAL HRD
* Uses Buffering and Picture Timing SEIs to signal HRD
* The HRD in H.264 was not designed with VFR in mind.
* It is therefore not recommendeded to use NAL HRD with VFR.
* Furthermore, reconfiguring the VBV (via x264_encoder_reconfig)
* will currently generate invalid HRD. */
int i_nal_hrd; //Hypothetical Reference Decoder检验编码器产生的符合该标准的NAL单元流。蓝光视频、电视广播及其它特殊领域有此要求
struct
{
/* they will be reduced to be 0 < x <= 65535 and prime */
int i_sar_height; // SAR: storage aspect ratio 存储像素比例
int i_sar_width;
int i_overscan; /* 0=undef, 1=no overscan, 2=overscan */
/* see h264 annex E for the values of the following */
int i_vidformat;
int b_fullrange; // 是否为全幅显示,1为全画面显示,0为截图(抠图)显示
int i_colorprim;
int i_transfer;
int i_colmatrix;
int i_chroma_loc; /* both top & bottom */
} vui; // VUI:video usability information 视频可用性信息,不是为编码使用的,而是为解码提供信息
/* Bitstream parameters */
int i_frame_reference; /* Maximum number of reference frames 参考帧最大帧数 */
int i_dpb_size; /* Force a DPB size larger than that implied by B-frames and reference frames.
* Useful in combination with interactive error resilience. */
int i_keyint_max; /* Force an IDR keyframe at this interval ,在这个间隔内强制插入一个IDR帧*/
int i_keyint_min; /* Scenecuts closer together than this are coded as I, not IDR. 发生场景切换足够近的位置插入I帧*/
int i_scenecut_threshold; /* how aggressively to insert extra I frames 场景切换阈值,大于此阈值则强制插入I帧*/
int b_intra_refresh; /* Whether or not to use periodic intra refresh instead of IDR frames. 是否使用周期帧内刷新代替IDR帧*/
int i_bframe; /* how many b-frame between 2 references pictures 在两个参考帧之间的B帧的个数*/
int i_bframe_adaptive; // 算法自适应调整b帧个数
int i_bframe_bias; // 控制B帧被P帧替换的概率
int i_bframe_pyramid; /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal 可使用B帧作为参考帧*/
int b_open_gop; // 是否可跨GOP进行参考帧预测
int b_bluray_compat; // 是否兼容蓝光压缩格式
int i_avcintra_class;
int b_deblocking_filter; // 去块滤波器是否开启
int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */
int i_deblocking_filter_beta; /* [-6, 6] idem */
int b_cabac; // 是否使用cabac熵编码格式
int i_cabac_init_idc;
int b_interlaced; // 是否为隔行扫描
int b_constrained_intra;
int i_cqm_preset; // CQM: custom quantization matrices 用户自定义量化矩阵
char *psz_cqm_file; /* filename (in UTF-8) of CQM file, JM format 量化参数矩阵文件 */
uint8_t cqm_4iy[16]; /* used only if i_cqm_preset == X264_CQM_CUSTOM */
uint8_t cqm_4py[16];
uint8_t cqm_4ic[16];
uint8_t cqm_4pc[16];
uint8_t cqm_8iy[64];
uint8_t cqm_8py[64];
uint8_t cqm_8ic[64];
uint8_t cqm_8pc[64];
/* Log */
void (*pf_log)( void *, int i_level, const char *psz, va_list );
void *p_log_private;
int i_log_level;
int b_full_recon; /* fully reconstruct frames, even when not necessary for encoding. Implied by psz_dump_yuv */
char *psz_dump_yuv; /* filename (in UTF-8) for reconstructed frames */
/* Encoder analyser parameters */
struct
{
unsigned int intra; /* intra partitions 帧内分块*/
unsigned int inter; /* inter partitions 帧间分块*/
int b_transform_8x8; // 8*8宏块子块 变换
int i_weighted_pred; /* weighting for P-frames P帧的预测权重 */
int b_weighted_bipred; /* implicit weighting for B-frames B帧的预测权重 */
int i_direct_mv_pred; /* spatial vs temporal mv prediction */
int i_chroma_qp_offset;
int i_me_method; /* motion estimation algorithm to use (X264_ME_*) 运动估计算法 */
int i_me_range; /* integer pixel motion estimation search range (from predicted mv) 运动估计搜索范围 */
int i_mv_range; /* maximum length of a mv (in pixels). -1 = auto, based on level 运动向量长度范围 */
int i_mv_range_thread; /* minimum space between threads. -1 = auto, based on number of threads. */
int i_subpel_refine; /* subpixel motion estimation quality 亚像素运动估计质量 */
int b_chroma_me; /* chroma ME for subpel and mode decision in P-frames */
int b_mixed_references; /* allow each mb partition to have its own reference number */
int i_trellis; /* trellis RD quantization */
int b_fast_pskip; /* early SKIP detection on P-frames */
int b_dct_decimate; /* transform coefficient thresholding on P-frames */
int i_noise_reduction; /* adaptive pseudo-deadzone */
float f_psy_rd; /* Psy RD strength */
float f_psy_trellis; /* Psy trellis strength */
int b_psy; /* Toggle all psy optimizations */
int b_mb_info; /* Use input mb_info data in x264_picture_t */
int b_mb_info_update; /* Update the values in mb_info according to the results of encoding. */
/* the deadzone size that will be used in luma quantization */
int i_luma_deadzone[2]; /* {inter, intra} */
int b_psnr; /* compute and print PSNR stats */
int b_ssim; /* compute and print SSIM stats */
} analyse;
/* Rate control parameters */
struct
{
int i_rc_method; /* X264_RC_* 码率控制算法类型 */
int i_qp_constant; /* 0 to (51 + 6*(x264_bit_depth-8)). 0=lossless qp常量 */
int i_qp_min; /* min allowed QP value 允许的最小qp值 */
int i_qp_max; /* max allowed QP value 允许的最大qp值 */
int i_qp_step; /* max QP step between frames 帧之间的qp最大步长 */
int i_bitrate;
float f_rf_constant; /* 1pass VBR, nominal QP */
float f_rf_constant_max; /* In CRF mode, maximum CRF as caused by VBV */
float f_rate_tolerance;
int i_vbv_max_bitrate;
int i_vbv_buffer_size;
float f_vbv_buffer_init; /* <=1: fraction of buffer_size. >1: kbit */
float f_ip_factor;
float f_pb_factor;
/* VBV filler: force CBR VBV and use filler bytes to ensure hard-CBR.
* Implied by NAL-HRD CBR. */
int b_filler;
int i_aq_mode; /* psy adaptive QP. (X264_AQ_*) */
float f_aq_strength;
int b_mb_tree; /* Macroblock-tree ratecontrol. mb_tree算法是否开启标志位 */
int i_lookahead;
/* 2pass */
int b_stat_write; /* Enable stat writing in psz_stat_out */
char *psz_stat_out; /* output filename (in UTF-8) of the 2pass stats file */
int b_stat_read; /* Read stat from psz_stat_in and use it */
char *psz_stat_in; /* input filename (in UTF-8) of the 2pass stats file */
/* 2pass params (same as ffmpeg ones) */
float f_qcompress; /* 0.0 => cbr, 1.0 => constant qp */
float f_qblur; /* temporally blur quants */
float f_complexity_blur; /* temporally blur complexity */
x264_zone_t *zones; /* ratecontrol overrides */
int i_zones; /* number of zone_t's */
char *psz_zones; /* alternate method of specifying zones */
} rc;
/* Cropping Rectangle parameters: added to those implicitly defined by
non-mod16 video resolutions. */
struct
{
unsigned int i_left; // 矩形框左边坐标
unsigned int i_top; // 矩形框上边坐标
unsigned int i_right; // 矩形框右边坐标
unsigned int i_bottom;// 矩形框下边坐标
} crop_rect; // 裁剪矩形框
/* frame packing arrangement flag */
int i_frame_packing;
/* Muxing parameters */
int b_aud; /* generate access unit delimiters */
int b_repeat_headers; /* put SPS/PPS before each keyframe */
int b_annexb; /* if set, place start codes (4 bytes) before NAL units,
* otherwise place size (4 bytes) before NAL units. */
int i_sps_id; /* SPS and PPS id number */
int b_vfr_input; /* VFR input. If 1, use timebase and timestamps for ratecontrol purposes.
* If 0, use fps only. */
int b_pulldown; /* use explicity set timebase for CFR */
uint32_t i_fps_num; // 帧率分子,为精确表达帧率,使用分子/分母两个变量表示
uint32_t i_fps_den; // 帧率分母
uint32_t i_timebase_num; /* Timebase numerator */
uint32_t i_timebase_den; /* Timebase denominator */
int b_tff;
/* Pulldown:
* The correct pic_struct must be passed with each input frame.
* The input timebase should be the timebase corresponding to the output framerate. This should be constant.
* e.g. for 3:2 pulldown timebase should be 1001/30000
* The PTS passed with each frame must be the PTS of the frame after pulldown is applied.
* Frame doubling and tripling require b_vfr_input set to zero (see H.264 Table D-1)
*
* Pulldown changes are not clearly defined in H.264. Therefore, it is the calling app's responsibility to manage this.
*/
int b_pic_struct;
/* Fake Interlaced.
*
* Used only when b_interlaced=0. Setting this flag makes it possible to flag the stream as PAFF interlaced yet
* encode all frames progessively. It is useful for encoding 25p and 30p Blu-Ray streams.
*/
int b_fake_interlaced;
/* Don't optimize header parameters based on video content, e.g. ensure that splitting an input video, compressing
* each part, and stitching them back together will result in identical SPS/PPS. This is necessary for stitching
* with container formats that don't allow multiple SPS/PPS. */
int b_stitchable;
int b_opencl; /* use OpenCL when available */
int i_opencl_device; /* specify count of GPU devices to skip, for CLI users */
void *opencl_device_id; /* pass explicit cl_device_id as void*, for API users */
char *psz_clbin_file; /* filename (in UTF-8) of the compiled OpenCL kernel cache file */
/* Slicing parameters */
int i_slice_max_size; /* Max size per slice in bytes; includes estimated NAL overhead. */
int i_slice_max_mbs; /* Max number of MBs per slice; overrides i_slice_count. */
int i_slice_min_mbs; /* Min number of MBs per slice */
int i_slice_count; /* Number of slices per frame: forces rectangular slices. */
int i_slice_count_max; /* Absolute cap on slices per frame; stops applying slice-max-size
* and slice-max-mbs if this is reached. */
/* Optional callback for freeing this x264_param_t when it is done being used.
* Only used when the x264_param_t sits in memory for an indefinite period of time,
* i.e. when an x264_param_t is passed to x264_t in an x264_picture_t or in zones.
* Not used when x264_encoder_reconfig is called directly. */
void (*param_free)( void* );
/* Optional low-level callback for low-latency encoding. Called for each output NAL unit
* immediately after the NAL unit is finished encoding. This allows the calling application
* to begin processing video data (e.g. by sending packets over a network) before the frame
* is done encoding.
*
* This callback MUST do the following in order to work correctly:
* 1) Have available an output buffer of at least size nal->i_payload*3/2 + 5 + 64.
* 2) Call x264_nal_encode( h, dst, nal ), where dst is the output buffer.
* After these steps, the content of nal is valid and can be used in the same way as if
* the NAL unit were output by x264_encoder_encode.
*
* This does not need to be synchronous with the encoding process: the data pointed to
* by nal (both before and after x264_nal_encode) will remain valid until the next
* x264_encoder_encode call. The callback must be re-entrant.
*
* This callback does not work with frame-based threads; threads must be disabled
* or sliced-threads enabled. This callback also does not work as one would expect
* with HRD -- since the buffering period SEI cannot be calculated until the frame
* is finished encoding, it will not be sent via this callback.
*
* Note also that the NALs are not necessarily returned in order when sliced threads is
* enabled. Accordingly, the variable i_first_mb and i_last_mb are available in
* x264_nal_t to help the calling application reorder the slices if necessary.
*
* When this callback is enabled, x264_encoder_encode does not return valid NALs;
* the calling application is expected to acquire all output NALs through the callback.
*
* It is generally sensible to combine this callback with a use of slice-max-mbs or
* slice-max-size.
*
* The opaque pointer is the opaque pointer from the input frame associated with this
* NAL unit. This helps distinguish between nalu_process calls from different sources,
* e.g. if doing multiple encodes in one process.
*/
void (*nalu_process)( x264_t *h, x264_nal_t *nal, void *opaque );
} x264_param_t;
x264_t结构体(最为重要),此结构体包含了编码所需的全部参数,需重点阅读分析:
struct x264_t
{
/* encoder parameters */
x264_param_t param;
x264_t *thread[X264_THREAD_MAX+1];
x264_t *lookahead_thread[X264_LOOKAHEAD_THREAD_MAX];
int b_thread_active; // 表示线程是否为激活状态,为0表示未激活,1表示处于激活状态
int i_thread_phase; /* which thread to use for the next frame 指示下一帧编码将用到的线程 */
int i_thread_idx; /* which thread this is 即当前帧编号 */
int i_threadslice_start; /* first row in this thread slice */
int i_threadslice_end; /* row after the end of this thread slice */
int i_threadslice_pass; /* which pass of encoding we are on 即当前线程正在编码第几遍 */
x264_threadpool_t *threadpool; // 线程池
x264_threadpool_t *lookaheadpool; // 超前线程池
x264_pthread_mutex_t mutex;
x264_pthread_cond_t cv;
/* bitstream output */
struct
{
int i_nal;
int i_nals_allocated; // 需要分配nal的个数
x264_nal_t *nal; // NAL的相关信息
int i_bitstream; /* size of p_bitstream ,即NAL数据bit数 */
uint8_t *p_bitstream; /* will hold data for all nal , NAL总数据 */
bs_t bs;
} out;
uint8_t *nal_buffer;
int nal_buffer_size;
x264_t *reconfig_h;
int reconfig;
/**** thread synchronization starts here ****/
/* frame number/poc */
int i_frame; // 帧
int i_frame_num;
int i_thread_frames; /* Number of different frames being encoded by threads;
* 1 when sliced-threads is on. */
int i_nal_type;
int i_nal_ref_idc;
int64_t i_disp_fields; /* Number of displayed fields (both coded and implied via pic_struct) */
int i_disp_fields_last_frame;
int64_t i_prev_duration; /* Duration of previous frame */
int64_t i_coded_fields; /* Number of coded fields (both coded and implied via pic_struct) */
int64_t i_cpb_delay; /* Equal to number of fields preceding this field
* since last buffering_period SEI */
int64_t i_coded_fields_lookahead; /* Use separate counters for lookahead */
int64_t i_cpb_delay_lookahead;
int64_t i_cpb_delay_pir_offset;
int64_t i_cpb_delay_pir_offset_next;
int b_queued_intra_refresh;
int64_t i_last_idr_pts;
int i_idr_pic_id;
/* quantization matrix for decoding, [cqm][qp%6][coef] */
int (*dequant4_mf[4])[16]; /* [4][6][16] */
int (*dequant8_mf[4])[64]; /* [4][6][64] */
/* quantization matrix for trellis, [cqm][qp][coef] */
int (*unquant4_mf[4])[16]; /* [4][QP_MAX_SPEC+1][16] */
int (*unquant8_mf[4])[64]; /* [4][QP_MAX_SPEC+1][64] */
/* quantization matrix for deadzone */
udctcoef (*quant4_mf[4])[16]; /* [4][QP_MAX_SPEC+1][16] */
udctcoef (*quant8_mf[4])[64]; /* [4][QP_MAX_SPEC+1][64] */
udctcoef (*quant4_bias[4])[16]; /* [4][QP_MAX_SPEC+1][16] */
udctcoef (*quant8_bias[4])[64]; /* [4][QP_MAX_SPEC+1][64] */
udctcoef (*quant4_bias0[4])[16]; /* [4][QP_MAX_SPEC+1][16] */
udctcoef (*quant8_bias0[4])[64]; /* [4][QP_MAX_SPEC+1][64] */
udctcoef (*nr_offset_emergency)[4][64];
/* mv/ref cost arrays. */
uint16_t *cost_mv[QP_MAX+1];
uint16_t *cost_mv_fpel[QP_MAX+1][4];
const uint8_t *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */
/* Slice header */
x264_slice_header_t sh;
/* SPS / PPS */
x264_sps_t sps[1];
x264_pps_t pps[1];
/* Slice header backup, for SEI_DEC_REF_PIC_MARKING */
int b_sh_backup;
x264_slice_header_t sh_backup;
/* cabac context */
x264_cabac_t cabac;
struct
{
/* Frames to be encoded (whose types have been decided) */
x264_frame_t **current;
/* Unused frames: 0 = fenc, 1 = fdec */
x264_frame_t **unused[2];
/* Unused blank frames (for duplicates) */
x264_frame_t **blank_unused;
/* frames used for reference + sentinels */
x264_frame_t *reference[X264_REF_MAX+2];
int i_last_keyframe; /* Frame number of the last keyframe */
int i_last_idr; /* Frame number of the last IDR (not RP)*/
int i_poc_last_open_gop; /* Poc of the I frame of the last open-gop. The value
* is only assigned during the period between that
* I frame and the next P or I frame, else -1 */
int i_input; /* Number of input frames already accepted */
int i_max_dpb; /* Number of frames allocated in the decoded picture buffer */
int i_max_ref0;
int i_max_ref1;
int i_delay; /* Number of frames buffered for B reordering */
int i_bframe_delay;
int64_t i_bframe_delay_time;
int64_t i_first_pts;
int64_t i_prev_reordered_pts[2];
int64_t i_largest_pts;
int64_t i_second_largest_pts;
int b_have_lowres; /* Whether 1/2 resolution luma planes are being used */
int b_have_sub8x8_esa;
} frames;
/* current frame being encoded */
x264_frame_t *fenc;
/* frame being reconstructed */
x264_frame_t *fdec;
/* references lists */
int i_ref[2];
x264_frame_t *fref[2][X264_REF_MAX+3];
x264_frame_t *fref_nearest[2];
int b_ref_reorder[2];
/* hrd */
int initial_cpb_removal_delay;
int initial_cpb_removal_delay_offset;
int64_t i_reordered_pts_delay;
/* Current MB DCT coeffs */
struct
{
ALIGNED_32( dctcoef luma16x16_dc[3][16] );
ALIGNED_16( dctcoef chroma_dc[2][8] );
// FIXME share memory?
ALIGNED_32( dctcoef luma8x8[12][64] );
ALIGNED_32( dctcoef luma4x4[16*3][16] );
} dct;
/* MB table and cache for current frame/mb */
struct
{
int i_mb_width;
int i_mb_height;
int i_mb_count; /* number of mbs in a frame */
/* Chroma subsampling */
int chroma_h_shift;
int chroma_v_shift;
/* Strides */
int i_mb_stride;
int i_b8_stride;
int i_b4_stride;
int left_b8[2];
int left_b4[2];
/* Current index */
int i_mb_x;
int i_mb_y;
int i_mb_xy;
int i_b8_xy;
int i_b4_xy;
/* Search parameters */
int i_me_method;
int i_subpel_refine;
int b_chroma_me;
int b_trellis;
int b_noise_reduction;
int b_dct_decimate;
int i_psy_rd; /* Psy RD strength--fixed point value*/
int i_psy_trellis; /* Psy trellis strength--fixed point value*/
int b_interlaced;
int b_adaptive_mbaff; /* MBAFF+subme 0 requires non-adaptive MBAFF i.e. all field mbs */
/* Allowed qpel MV range to stay within the picture + emulated edge pixels */
int mv_min[2];
int mv_max[2];
int mv_miny_row[3]; /* 0 == top progressive, 1 == bot progressive, 2 == interlaced */
int mv_maxy_row[3];
/* Subpel MV range for motion search.
* same mv_min/max but includes levels' i_mv_range. */
int mv_min_spel[2];
int mv_max_spel[2];
int mv_miny_spel_row[3];
int mv_maxy_spel_row[3];
/* Fullpel MV range for motion search */
ALIGNED_8( int16_t mv_limit_fpel[2][2] ); /* min_x, min_y, max_x, max_y */
int mv_miny_fpel_row[3];
int mv_maxy_fpel_row[3];
/* neighboring MBs */
unsigned int i_neighbour;
unsigned int i_neighbour8[4]; /* neighbours of each 8x8 or 4x4 block that are available */
unsigned int i_neighbour4[16]; /* at the time the block is coded */
unsigned int i_neighbour_intra; /* for constrained intra pred */
unsigned int i_neighbour_frame; /* ignoring slice boundaries */
int i_mb_type_top;
int i_mb_type_left[2];
int i_mb_type_topleft;
int i_mb_type_topright;
int i_mb_prev_xy;
int i_mb_left_xy[2];
int i_mb_top_xy;
int i_mb_topleft_xy;
int i_mb_topright_xy;
int i_mb_top_y;
int i_mb_topleft_y;
int i_mb_topright_y;
const x264_left_table_t *left_index_table;
int i_mb_top_mbpair_xy;
int topleft_partition;
int b_allow_skip;
int field_decoding_flag;
/**** thread synchronization ends here ****/
/* subsequent variables are either thread-local or constant,
* and won't be copied from one thread to another */
/* mb table */
uint8_t *base; /* base pointer for all malloced data in this mb */
int8_t *type; /* mb type */
uint8_t *partition; /* mb partition */
int8_t *qp; /* mb qp */
int16_t *cbp; /* mb cbp: 0x0?: luma, 0x?0: chroma, 0x100: luma dc, 0x0200 and 0x0400: chroma dc (all set for PCM)*/
int8_t (*intra4x4_pred_mode)[8]; /* intra4x4 pred mode. for non I4x4 set to I_PRED_4x4_DC(2) */
/* actually has only 7 entries; set to 8 for write-combining optimizations */
uint8_t (*non_zero_count)[16*3]; /* nzc. for I_PCM set to 16 */
int8_t *chroma_pred_mode; /* chroma_pred_mode. cabac only. for non intra I_PRED_CHROMA_DC(0) */
int16_t (*mv[2])[2]; /* mb mv. set to 0 for intra mb */
uint8_t (*mvd[2])[8][2]; /* absolute value of mb mv difference with predict, clipped to [0,33]. set to 0 if intra. cabac only */
int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only) */
int16_t (*mvr[2][X264_REF_MAX*2])[2];/* 16x16 mv for each possible ref */
int8_t *skipbp; /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
int8_t *mb_transform_size; /* transform_size_8x8_flag of each mb */
uint16_t *slice_table; /* sh->first_mb of the slice that the indexed mb is part of
* NOTE: this will fail on resolutions above 2^16 MBs... */
uint8_t *field;
/* buffer for weighted versions of the reference frames */
pixel *p_weight_buf[X264_REF_MAX];
/* current value */
int i_type;
int i_partition;
ALIGNED_4( uint8_t i_sub_partition[4] );
int b_transform_8x8;
int i_cbp_luma;
int i_cbp_chroma;
int i_intra16x16_pred_mode;
int i_chroma_pred_mode;
/* skip flags for i4x4 and i8x8
* 0 = encode as normal.
* 1 (non-RD only) = the DCT is still in h->dct, restore fdec and skip reconstruction.
* 2 (RD only) = the DCT has since been overwritten by RD; restore that too. */
int i_skip_intra;
/* skip flag for motion compensation */
/* if we've already done MC, we don't need to do it again */
int b_skip_mc;
/* set to true if we are re-encoding a macroblock. */
int b_reencode_mb;
int ip_offset; /* Used by PIR to offset the quantizer of intra-refresh blocks. */
int b_deblock_rdo;
int b_overflow; /* If CAVLC had a level code overflow during bitstream writing. */
struct
{
/* space for p_fenc and p_fdec */
#define FENC_STRIDE 16
#define FDEC_STRIDE 32
ALIGNED_32( pixel fenc_buf[48*FENC_STRIDE] );
ALIGNED_32( pixel fdec_buf[52*FDEC_STRIDE] );
/* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
ALIGNED_16( pixel i4x4_fdec_buf[16*16] );
ALIGNED_16( pixel i8x8_fdec_buf[16*16] );
ALIGNED_16( dctcoef i8x8_dct_buf[3][64] );
ALIGNED_16( dctcoef i4x4_dct_buf[15][16] );
uint32_t i4x4_nnz_buf[4];
uint32_t i8x8_nnz_buf[4];
int i4x4_cbp;
int i8x8_cbp;
/* Psy trellis DCT data */
ALIGNED_16( dctcoef fenc_dct8[4][64] );
ALIGNED_16( dctcoef fenc_dct4[16][16] );
/* Psy RD SATD/SA8D scores cache */
ALIGNED_32( uint64_t fenc_hadamard_cache[9] );
ALIGNED_32( uint32_t fenc_satd_cache[32] );
/* pointer over mb of the frame to be compressed */
pixel *p_fenc[3]; /* y,u,v */
/* pointer to the actual source frame, not a block copy */
pixel *p_fenc_plane[3];
/* pointer over mb of the frame to be reconstructed */
pixel *p_fdec[3];
/* pointer over mb of the references */
int i_fref[2];
/* [12]: yN, yH, yV, yHV, (NV12 ? uv : I444 ? (uN, uH, uV, uHV, vN, ...)) */
pixel *p_fref[2][X264_REF_MAX*2][12];
pixel *p_fref_w[X264_REF_MAX*2]; /* weighted fullpel luma */
uint16_t *p_integral[2][X264_REF_MAX];
/* fref stride */
int i_stride[3];
} pic;
/* cache */
struct
{
/* real intra4x4_pred_mode if I_4X4 or I_8X8, I_PRED_4x4_DC if mb available, -1 if not */
ALIGNED_8( int8_t intra4x4_pred_mode[X264_SCAN8_LUMA_SIZE] );
/* i_non_zero_count if available else 0x80 */
ALIGNED_16( uint8_t non_zero_count[X264_SCAN8_SIZE] );
/* -1 if unused, -2 if unavailable */
ALIGNED_4( int8_t ref[2][X264_SCAN8_LUMA_SIZE] );
/* 0 if not available */
ALIGNED_16( int16_t mv[2][X264_SCAN8_LUMA_SIZE][2] );
ALIGNED_8( uint8_t mvd[2][X264_SCAN8_LUMA_SIZE][2] );
/* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
ALIGNED_4( int8_t skip[X264_SCAN8_LUMA_SIZE] );
ALIGNED_4( int16_t direct_mv[2][4][2] );
ALIGNED_4( int8_t direct_ref[2][4] );
int direct_partition;
ALIGNED_4( int16_t pskip_mv[2] );
/* number of neighbors (top and left) that used 8x8 dct */
int i_neighbour_transform_size;
int i_neighbour_skip;
/* neighbor CBPs */
int i_cbp_top;
int i_cbp_left;
/* extra data required for mbaff in mv prediction */
int16_t topright_mv[2][3][2];
int8_t topright_ref[2][3];
/* current mb deblock strength */
uint8_t (*deblock_strength)[8][4];
} cache;
/* */
int i_qp; /* current qp */
int i_chroma_qp;
int i_last_qp; /* last qp */
int i_last_dqp; /* last delta qp */
int b_variable_qp; /* whether qp is allowed to vary per macroblock */
int b_lossless;
int b_direct_auto_read; /* take stats for --direct auto from the 2pass log */
int b_direct_auto_write; /* analyse direct modes, to use and/or save */
/* lambda values */
int i_trellis_lambda2[2][2]; /* [luma,chroma][inter,intra] */
int i_psy_rd_lambda;
int i_chroma_lambda2_offset;
/* B_direct and weighted prediction */
int16_t dist_scale_factor_buf[2][2][X264_REF_MAX*2][4];
int16_t (*dist_scale_factor)[4];
int8_t bipred_weight_buf[2][2][X264_REF_MAX*2][4];
int8_t (*bipred_weight)[4];
/* maps fref1[0]'s ref indices into the current list0 */
#define map_col_to_list0(col) h->mb.map_col_to_list0[(col)+2]
int8_t map_col_to_list0[X264_REF_MAX+2];
int ref_blind_dupe; /* The index of the blind reference frame duplicate. */
int8_t deblock_ref_table[X264_REF_MAX*2+2];
#define deblock_ref_table(x) h->mb.deblock_ref_table[(x)+2]
} mb;
/* rate control encoding only */
x264_ratecontrol_t *rc;
/* stats */
struct
{
/* Cumulated stats */
/* per slice info */
int i_frame_count[3];
int64_t i_frame_size[3];
double f_frame_qp[3];
int i_consecutive_bframes[X264_BFRAME_MAX+1];
/* */
double f_ssd_global[3];
double f_psnr_average[3];
double f_psnr_mean_y[3];
double f_psnr_mean_u[3];
double f_psnr_mean_v[3];
double f_ssim_mean_y[3];
double f_frame_duration[3];
/* */
int64_t i_mb_count[3][19];
int64_t i_mb_partition[2][17];
int64_t i_mb_count_8x8dct[2];
int64_t i_mb_count_ref[2][2][X264_REF_MAX*2];
int64_t i_mb_cbp[6];
int64_t i_mb_pred_mode[4][13];
int64_t i_mb_field[3];
/* */
int i_direct_score[2];
int i_direct_frames[2];
/* num p-frames weighted */
int i_wpred[2];
/* Current frame stats */
x264_frame_stat_t frame;
} stat;
/* 0 = luma 4x4, 1 = luma 8x8, 2 = chroma 4x4, 3 = chroma 8x8 */
udctcoef (*nr_offset)[64];
uint32_t (*nr_residual_sum)[64];
uint32_t *nr_count;
ALIGNED_32( udctcoef nr_offset_denoise[4][64] );
ALIGNED_32( uint32_t nr_residual_sum_buf[2][4][64] );
uint32_t nr_count_buf[2][4];
uint8_t luma2chroma_pixel[7]; /* Subsampled pixel size */
/* Buffers that are allocated per-thread even in sliced threads. */
void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
void *scratch_buffer2; /* if the first one's already in use */
pixel *intra_border_backup[5][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
/* Deblock strength values are stored for each 4x4 partition. In MBAFF
* there are four extra values that need to be stored, located in [4][i]. */
uint8_t (*deblock_strength[2])[2][8][4];
/* CPU functions dependents */
x264_predict_t predict_16x16[4+3];
x264_predict8x8_t predict_8x8[9+3];
x264_predict_t predict_4x4[9+3];
x264_predict_t predict_chroma[4+3];
x264_predict_t predict_8x8c[4+3];
x264_predict_t predict_8x16c[4+3];
x264_predict_8x8_filter_t predict_8x8_filter;
x264_pixel_function_t pixf;
x264_mc_functions_t mc;
x264_dct_function_t dctf;
x264_zigzag_function_t zigzagf;
x264_zigzag_function_t zigzagf_interlaced;
x264_zigzag_function_t zigzagf_progressive;
x264_quant_function_t quantf;
x264_deblock_function_t loopf;
x264_bitstream_function_t bsf;
x264_lookahead_t *lookahead;
#if HAVE_OPENCL
x264_opencl_t opencl;
#endif
};
x264_picture_t 结构体,此结构体用于存储编码前后的视频数据及相关信息:
typedef struct x264_picture_t
{
/* In: force picture type (if not auto)
* If x264 encoding parameters are violated in the forcing of picture types,
* x264 will correct the input picture type and log a warning.
* Out: type of the picture encoded */
int i_type; // 输入:强制标识编码帧类型,若编码器检验参数无效时,会自动改正编码帧类型 输出:表示编码帧类型
/* In: force quantizer for != X264_QP_AUTO */
int i_qpplus1; // 输入:强制编码时量化参数qp数值+1
/* In: pic_struct, for pulldown/doubling/etc...used only if b_pic_struct=1.
* use pic_struct_e for pic_struct inputs
* Out: pic_struct element associated with frame */
int i_pic_struct;
/* Out: whether this frame is a keyframe. Important when using modes that result in
* SEI recovery points being used instead of IDR frames. */
int b_keyframe; // 输出: 标识是否为关键帧
/* In: user pts, Out: pts of encoded picture (user)*/
int64_t i_pts; // pts 编码时间戳
/* Out: frame dts. When the pts of the first frame is close to zero,
* initial frames may have a negative dts which must be dealt with by any muxer */
int64_t i_dts; // dts 显示时间戳
/* In: custom encoding parameters to be set from this frame forwards
(in coded order, not display order). If NULL, continue using
parameters from the previous frame. Some parameters, such as
aspect ratio, can only be changed per-GOP due to the limitations
of H.264 itself; in this case, the caller must force an IDR frame
if it needs the changed parameter to apply immediately. */
x264_param_t *param;
/* In: raw image data */
/* Out: reconstructed image data. x264 may skip part of the reconstruction process,
e.g. deblocking, in frames where it isn't necessary. To force complete
reconstruction, at a small speed cost, set b_full_recon. */
x264_image_t img; // 输入:原始yuv图像数据 输出:重建图像数据
/* In: optional information to modify encoder decisions for this frame
* Out: information about the encoded frame */
x264_image_properties_t prop;
/* Out: HRD timing information. Output only when i_nal_hrd is set. */
x264_hrd_t hrd_timing; // 输出:仅当i_nal_hrd设置为1时此参数才有效,HRD时间信息
/* In: arbitrary user SEI (e.g subtitles, AFDs) */
x264_sei_t extra_sei; // SEI信息
/* private user data. copied from input to output frames. */
void *opaque; // 私有用户信息,可用于加密
} x264_picture_t;