x264代码学习笔记(一):main函数和encode函数

x264代码的主输入口:main()函数

  1. 对软件进行多线程初始化
  2. 对命令行字符串进行编码格式检测,若不为UTF-8编码格式,则转换
  3. 解析命令行参数,并赋值给 x264_param_t 结构体和 cli_opt_t 结构体,以备后续使用
  4. 使用encode() 函数对视频数据进行编码压缩
  5. 关闭文件等内存清理操作
int main( int argc, char **argv )
{
    x264_param_t param;   // 定义编码器参数结构体
    cli_opt_t opt = {0};  // 此结构体是记录一些与编码关系较小的设置信息
    int ret = 0;

    FAIL_IF_ERROR( x264_threading_init(), "unable to initialize threading\n" );

#ifdef _WIN32
    FAIL_IF_ERROR( !get_argv_utf8( &argc, &argv ), "unable to convert command line to UTF-8\n" );

    GetConsoleTitleW( org_console_title, CONSOLE_TITLE_SIZE );
    _setmode( _fileno( stdin ),  _O_BINARY );
    _setmode( _fileno( stdout ), _O_BINARY );
    _setmode( _fileno( stderr ), _O_BINARY );
#endif

    /* Parse command line 解析命令行*/
    if( parse( argc, argv, ¶m, &opt ) < 0 )  // 参数赋值以及初始化参数赋值
        ret = -1;

#ifdef _WIN32
    /* Restore title; it can be changed by input modules */
    SetConsoleTitleW( org_console_title );
#endif

    /* Control-C handler */
    signal( SIGINT, sigint_handler ); // 捕获Control-C 按键信号

    if( !ret )
        ret = encode( ¶m, &opt );  // 依据所传输的参数,开始编码

    /* clean up handles */
    if( filter.free )
        filter.free( opt.hin );  // 释放输入yuv文件
    else if( opt.hin )
        cli_input.close_file( opt.hin );  // 关闭输入yuv文件句柄
    if( opt.hout )
        cli_output.close_file( opt.hout, 0, 0 );  // 关闭编码后输出文件句柄
    if( opt.tcfile_out )
        fclose( opt.tcfile_out );
    if( opt.qpfile )
        fclose( opt.qpfile );  // 关闭qp文件句柄

#ifdef _WIN32
    SetConsoleTitleW( org_console_title );
    free( argv );
#endif

    return ret;
}

encode()函数:

  1. 打开编码器,即初始化编码一帧所需的参数
  2. NAL包头信息初始化
  3. 逐帧编码视频帧
  4. 清空帧缓存并编码
  5. 关闭编码器,即清理释放内存
static int encode( x264_param_t *param, cli_opt_t *opt )
{
    x264_t *h = NULL;  // 编码所包含的所有参数结构体信息
    x264_picture_t pic;
    cli_pic_t cli_pic;
    const cli_pulldown_t *pulldown = NULL; // shut up gcc

    int     i_frame = 0;  // 帧索引
    int     i_frame_output = 0; // 输出编码帧统计
    int64_t i_end, i_previous = 0, i_start = 0;  // 结束时间、状态统计后时间、开始时间
    int64_t i_file = 0;   // 记录文件的尺寸大小
    int     i_frame_size; // 帧数据尺寸
    int64_t last_dts = 0;    // 最新编码帧的dts
    int64_t prev_dts = 0;    // 最新编码帧前一帧的dts
    int64_t first_dts = 0;   // 第一帧编码帧的dts
#   define  MAX_PTS_WARNING 3 /* arbitrary */
    int     pts_warning_cnt = 0;
    int64_t largest_pts = -1;
    int64_t second_largest_pts = -1;
    int64_t ticks_per_frame;
    double  duration;
    double  pulldown_pts = 0;
    int     retval = 0;  // 函数返回值

    opt->b_progress &= param->i_log_level < X264_LOG_DEBUG;

    /* set up pulldown ,一般电影为24fps,而电视为50fps或者60fps,这就需要将24fps转换到50或60fps,这个过程就叫做pulldown 模式 */
    if( opt->i_pulldown && !param->b_vfr_input )  // 由上面的解释可以得出,帧率必须恒定才可使用pulldown模式,试想变帧率如何进行pulldown模式呢
    {
        param->b_pulldown = 1;  // 1表示使用pulldown模式,而0表示不使用pulldown模式
        param->b_pic_struct = 1;
        pulldown = &pulldown_values[opt->i_pulldown];
        param->i_timebase_num = param->i_fps_den;  //帧率分母为时间基分子
        FAIL_IF_ERROR2( fmod( param->i_fps_num * pulldown->fps_factor, 1 ),
                        "unsupported framerate for chosen pulldown\n" );
        param->i_timebase_den = param->i_fps_num * pulldown->fps_factor;
    }

    h = x264_encoder_open( param );   // 打开X264编码器
    FAIL_IF_ERROR2( !h, "x264_encoder_open failed\n" );

    x264_encoder_parameters( h, param );  // 编码器参数设置

    FAIL_IF_ERROR2( cli_output.set_param( opt->hout, param ), "can't set outfile param\n" );

    i_start = x264_mdate();  // 记录开始时间

    /* ticks/frame = ticks/second / frames/second */
    ticks_per_frame = (int64_t)param->i_timebase_den * param->i_fps_den / param->i_timebase_num / param->i_fps_num;
    FAIL_IF_ERROR2( ticks_per_frame < 1 && !param->b_vfr_input, "ticks_per_frame invalid: %"PRId64"\n", ticks_per_frame );
    ticks_per_frame = X264_MAX( ticks_per_frame, 1 );

    if( !param->b_repeat_headers )
    {
        // Write SPS/PPS/SEI
        x264_nal_t *headers;
        int i_nal;

        FAIL_IF_ERROR2( x264_encoder_headers( h, &headers, &i_nal ) < 0, "x264_encoder_headers failed\n" );  // 写入NAL头信息
        FAIL_IF_ERROR2( (i_file = cli_output.write_headers( opt->hout, headers )) < 0, "error writing headers to output file\n" );
    }

    if( opt->tcfile_out )
        fprintf( opt->tcfile_out, "# timecode format v2\n" );

    /* Encode frames 对每帧图像进行编码 */
    for( ; !b_ctrl_c && (i_frame < param->i_frame_total || !param->i_frame_total); i_frame++ )
    {
        if( filter.get_frame( opt->hin, &cli_pic, i_frame + opt->i_seek ) )
            break;
        x264_picture_init( &pic );   // 初始化pic变量
        convert_cli_to_lib_pic( &pic, &cli_pic );

        if( !param->b_vfr_input )  // 判断是否为可变帧率
            pic.i_pts = i_frame;

        if( opt->i_pulldown && !param->b_vfr_input )
        {
            pic.i_pic_struct = pulldown->pattern[ i_frame % pulldown->mod ];
            pic.i_pts = (int64_t)( pulldown_pts + 0.5 );
            pulldown_pts += pulldown_frame_duration[pic.i_pic_struct];
        }
        else if( opt->timebase_convert_multiplier )
            pic.i_pts = (int64_t)( pic.i_pts * opt->timebase_convert_multiplier + 0.5 );

        if( pic.i_pts <= largest_pts )  // 时刻检验pts是否严格递增的
        {
            if( cli_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
                x264_cli_log( "x264", X264_LOG_WARNING, "non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
                             i_frame, pic.i_pts, largest_pts );
            else if( pts_warning_cnt == MAX_PTS_WARNING )
                x264_cli_log( "x264", X264_LOG_WARNING, "too many nonmonotonic pts warnings, suppressing further ones\n" );
            pts_warning_cnt++;
            pic.i_pts = largest_pts + ticks_per_frame;
        }

        second_largest_pts = largest_pts;  // 更新第二大pts数值
        largest_pts = pic.i_pts;   // 更新pts最大值
        if( opt->tcfile_out )
            fprintf( opt->tcfile_out, "%.6f\n", pic.i_pts * ((double)param->i_timebase_num / param->i_timebase_den) * 1e3 );

        if( opt->qpfile )
            parse_qpfile( opt, &pic, i_frame + opt->i_seek );  // 解析qp参数文件

        prev_dts = last_dts;   // dts值赋值给prev_dts保存
        i_frame_size = encode_frame( h, opt->hout, &pic, &last_dts );  // 编码单帧,得到最新的dts值,返回帧数据
        if( i_frame_size < 0 )
        {
            b_ctrl_c = 1; /* lie to exit the loop */
            retval = -1;
        }
        else if( i_frame_size )
        {
            i_file += i_frame_size;
            i_frame_output++;   // 编码输出帧数
            if( i_frame_output == 1 )
                first_dts = prev_dts = last_dts;
        }

        if( filter.release_frame( opt->hin, &cli_pic, i_frame + opt->i_seek ) )
            break;

        /* update status line (up to 1000 times per input file) */
        if( opt->b_progress && i_frame_output )
            i_previous = print_status( i_start, i_previous, i_frame_output, param->i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts );
    }
    /* Flush delayed frames */
    while( !b_ctrl_c && x264_encoder_delayed_frames( h ) )
    {
        prev_dts = last_dts;
        i_frame_size = encode_frame( h, opt->hout, NULL, &last_dts );
        if( i_frame_size < 0 )
        {
            b_ctrl_c = 1; /* lie to exit the loop */
            retval = -1;
        }
        else if( i_frame_size )
        {
            i_file += i_frame_size;
            i_frame_output++;
            if( i_frame_output == 1 )
                first_dts = prev_dts = last_dts;
        }
        if( opt->b_progress && i_frame_output )
            i_previous = print_status( i_start, i_previous, i_frame_output, param->i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts );
    }
fail:
    if( pts_warning_cnt >= MAX_PTS_WARNING && cli_log_level < X264_LOG_DEBUG )
        x264_cli_log( "x264", X264_LOG_WARNING, "%d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );

    /* duration algorithm fails when only 1 frame is output */
    if( i_frame_output == 1 )
        duration = (double)param->i_fps_den / param->i_fps_num;
    else if( b_ctrl_c )
        duration = (double)(2 * last_dts - prev_dts - first_dts) * param->i_timebase_num / param->i_timebase_den;
    else
        duration = (double)(2 * largest_pts - second_largest_pts) * param->i_timebase_num / param->i_timebase_den;

    i_end = x264_mdate();    // 编码结束时间
    /* Erase progress indicator before printing encoding stats. */
    if( opt->b_progress )
        fprintf( stderr, "                                                                               \r" );
    if( h )
        x264_encoder_close( h );   // 关闭解码器
    fprintf( stderr, "\n" );

    if( b_ctrl_c )
        fprintf( stderr, "aborted at input frame %d, output frame %d\n", opt->i_seek + i_frame, i_frame_output );

    cli_output.close_file( opt->hout, largest_pts, second_largest_pts );
    opt->hout = NULL;

    if( i_frame_output > 0 )
    {
        double fps = (double)i_frame_output * (double)1000000 /
                     (double)( i_end - i_start );    // 求帧率

        fprintf( stderr, "encoded %d frames, %.2f fps, %.2f kb/s\n", i_frame_output, fps,
                 (double) i_file * 8 / ( 1000 * duration ) );
    }

    return retval;
}

其中涉及到非常重要的几个结构体分别为 x264_param_t、x264_t 和 x264_picture_t。

x264_param_t 结构体,此结构体基本涵盖了命令行所涉及到的参数:

typedef struct x264_param_t
{
    /* CPU flags */
    unsigned int cpu;
    int         i_threads;           /* encode multiple frames in parallel,多线程并行编码多帧 */
    int         i_lookahead_threads; /* multiple threads for lookahead analysis,提前分析线程个数 */
    int         b_sliced_threads;  /* Whether to use slice-based threading. 标识是否使用基于slice的线程处理 */
    int         b_deterministic; /* whether to allow non-deterministic optimizations when threaded */
    int         b_cpu_independent; /* force canonical behavior rather than cpu-dependent optimal algorithms 是否依赖CPU内部优化算法*/
    int         i_sync_lookahead; /* threaded lookahead buffer 线程化帧分析缓存*/

    /* Video Properties */
    int         i_width;   // 视频分辨率宽度
    int         i_height;  // 视频分辨率高度
    int         i_csp;         /* CSP of encoded bitstream 色彩空间类型 calorspace type*/
    int         i_level_idc;   // level
    int         i_frame_total; /* number of frames to encode if known, else 0 */

    /* NAL HRD
     * Uses Buffering and Picture Timing SEIs to signal HRD
     * The HRD in H.264 was not designed with VFR in mind.
     * It is therefore not recommendeded to use NAL HRD with VFR.
     * Furthermore, reconfiguring the VBV (via x264_encoder_reconfig)
     * will currently generate invalid HRD. */
    int         i_nal_hrd; //Hypothetical Reference Decoder检验编码器产生的符合该标准的NAL单元流。蓝光视频、电视广播及其它特殊领域有此要求

    struct
    {
        /* they will be reduced to be 0 < x <= 65535 and prime */
        int         i_sar_height;  // SAR: storage aspect ratio  存储像素比例
        int         i_sar_width;

        int         i_overscan;    /* 0=undef, 1=no overscan, 2=overscan */

        /* see h264 annex E for the values of the following */
        int         i_vidformat;
        int         b_fullrange;   // 是否为全幅显示,1为全画面显示,0为截图(抠图)显示
        int         i_colorprim;
        int         i_transfer;
        int         i_colmatrix;
        int         i_chroma_loc;    /* both top & bottom */
    } vui;   // VUI:video usability information 视频可用性信息,不是为编码使用的,而是为解码提供信息

    /* Bitstream parameters */
    int         i_frame_reference;  /* Maximum number of reference frames 参考帧最大帧数 */
    int         i_dpb_size;         /* Force a DPB size larger than that implied by B-frames and reference frames.
                                     * Useful in combination with interactive error resilience. */
    int         i_keyint_max;       /* Force an IDR keyframe at this interval ,在这个间隔内强制插入一个IDR帧*/
    int         i_keyint_min;       /* Scenecuts closer together than this are coded as I, not IDR. 发生场景切换足够近的位置插入I帧*/
    int         i_scenecut_threshold; /* how aggressively to insert extra I frames 场景切换阈值,大于此阈值则强制插入I帧*/
    int         b_intra_refresh;    /* Whether or not to use periodic intra refresh instead of IDR frames. 是否使用周期帧内刷新代替IDR帧*/

    int         i_bframe;   /* how many b-frame between 2 references pictures 在两个参考帧之间的B帧的个数*/
    int         i_bframe_adaptive;  // 算法自适应调整b帧个数
    int         i_bframe_bias;    // 控制B帧被P帧替换的概率
    int         i_bframe_pyramid;   /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal 可使用B帧作为参考帧*/
    int         b_open_gop;   // 是否可跨GOP进行参考帧预测
    int         b_bluray_compat;  // 是否兼容蓝光压缩格式
    int         i_avcintra_class;

    int         b_deblocking_filter;     // 去块滤波器是否开启
    int         i_deblocking_filter_alphac0;    /* [-6, 6] -6 light filter, 6 strong */
    int         i_deblocking_filter_beta;       /* [-6, 6]  idem */

    int         b_cabac;        // 是否使用cabac熵编码格式
    int         i_cabac_init_idc;

    int         b_interlaced;    // 是否为隔行扫描
    int         b_constrained_intra;

    int         i_cqm_preset;   // CQM: custom quantization matrices 用户自定义量化矩阵
    char        *psz_cqm_file;      /* filename (in UTF-8) of CQM file, JM format 量化参数矩阵文件 */
    uint8_t     cqm_4iy[16];        /* used only if i_cqm_preset == X264_CQM_CUSTOM */
    uint8_t     cqm_4py[16];
    uint8_t     cqm_4ic[16];
    uint8_t     cqm_4pc[16];
    uint8_t     cqm_8iy[64];
    uint8_t     cqm_8py[64];
    uint8_t     cqm_8ic[64];
    uint8_t     cqm_8pc[64];

    /* Log */
    void        (*pf_log)( void *, int i_level, const char *psz, va_list );
    void        *p_log_private;
    int         i_log_level;
    int         b_full_recon;   /* fully reconstruct frames, even when not necessary for encoding.  Implied by psz_dump_yuv */
    char        *psz_dump_yuv;  /* filename (in UTF-8) for reconstructed frames */

    /* Encoder analyser parameters */
    struct
    {
        unsigned int intra;     /* intra partitions 帧内分块*/
        unsigned int inter;     /* inter partitions 帧间分块*/

        int          b_transform_8x8; // 8*8宏块子块 变换
        int          i_weighted_pred; /* weighting for P-frames P帧的预测权重 */
        int          b_weighted_bipred; /* implicit weighting for B-frames B帧的预测权重 */
        int          i_direct_mv_pred; /* spatial vs temporal mv prediction */
        int          i_chroma_qp_offset;

        int          i_me_method; /* motion estimation algorithm to use (X264_ME_*) 运动估计算法 */
        int          i_me_range; /* integer pixel motion estimation search range (from predicted mv) 运动估计搜索范围 */
        int          i_mv_range; /* maximum length of a mv (in pixels). -1 = auto, based on level 运动向量长度范围 */
        int          i_mv_range_thread; /* minimum space between threads. -1 = auto, based on number of threads. */
        int          i_subpel_refine; /* subpixel motion estimation quality 亚像素运动估计质量 */
        int          b_chroma_me; /* chroma ME for subpel and mode decision in P-frames */
        int          b_mixed_references; /* allow each mb partition to have its own reference number */
        int          i_trellis;  /* trellis RD quantization */
        int          b_fast_pskip; /* early SKIP detection on P-frames */
        int          b_dct_decimate; /* transform coefficient thresholding on P-frames */
        int          i_noise_reduction; /* adaptive pseudo-deadzone */
        float        f_psy_rd; /* Psy RD strength */
        float        f_psy_trellis; /* Psy trellis strength */
        int          b_psy; /* Toggle all psy optimizations */

        int          b_mb_info;            /* Use input mb_info data in x264_picture_t */
        int          b_mb_info_update; /* Update the values in mb_info according to the results of encoding. */

        /* the deadzone size that will be used in luma quantization */
        int          i_luma_deadzone[2]; /* {inter, intra} */

        int          b_psnr;    /* compute and print PSNR stats */
        int          b_ssim;    /* compute and print SSIM stats */
    } analyse;

    /* Rate control parameters */
    struct
    {
        int         i_rc_method;    /* X264_RC_* 码率控制算法类型 */

        int         i_qp_constant;  /* 0 to (51 + 6*(x264_bit_depth-8)). 0=lossless qp常量 */
        int         i_qp_min;       /* min allowed QP value 允许的最小qp值 */
        int         i_qp_max;       /* max allowed QP value 允许的最大qp值 */
        int         i_qp_step;      /* max QP step between frames 帧之间的qp最大步长 */

        int         i_bitrate;
        float       f_rf_constant;  /* 1pass VBR, nominal QP */
        float       f_rf_constant_max;  /* In CRF mode, maximum CRF as caused by VBV */
        float       f_rate_tolerance;
        int         i_vbv_max_bitrate;
        int         i_vbv_buffer_size;
        float       f_vbv_buffer_init; /* <=1: fraction of buffer_size. >1: kbit */
        float       f_ip_factor;
        float       f_pb_factor;

        /* VBV filler: force CBR VBV and use filler bytes to ensure hard-CBR.
         * Implied by NAL-HRD CBR. */
        int         b_filler;

        int         i_aq_mode;      /* psy adaptive QP. (X264_AQ_*) */
        float       f_aq_strength;
        int         b_mb_tree;      /* Macroblock-tree ratecontrol. mb_tree算法是否开启标志位 */
        int         i_lookahead;

        /* 2pass */
        int         b_stat_write;   /* Enable stat writing in psz_stat_out */
        char        *psz_stat_out;  /* output filename (in UTF-8) of the 2pass stats file */
        int         b_stat_read;    /* Read stat from psz_stat_in and use it */
        char        *psz_stat_in;   /* input filename (in UTF-8) of the 2pass stats file */

        /* 2pass params (same as ffmpeg ones) */
        float       f_qcompress;    /* 0.0 => cbr, 1.0 => constant qp */
        float       f_qblur;        /* temporally blur quants */
        float       f_complexity_blur; /* temporally blur complexity */
        x264_zone_t *zones;         /* ratecontrol overrides */
        int         i_zones;        /* number of zone_t's */
        char        *psz_zones;     /* alternate method of specifying zones */
    } rc;

    /* Cropping Rectangle parameters: added to those implicitly defined by
       non-mod16 video resolutions. */
    struct
    {
        unsigned int i_left;  // 矩形框左边坐标
        unsigned int i_top;   // 矩形框上边坐标
        unsigned int i_right; // 矩形框右边坐标
        unsigned int i_bottom;// 矩形框下边坐标
    } crop_rect;   // 裁剪矩形框

    /* frame packing arrangement flag */
    int i_frame_packing;

    /* Muxing parameters */
    int b_aud;                  /* generate access unit delimiters */
    int b_repeat_headers;       /* put SPS/PPS before each keyframe */
    int b_annexb;               /* if set, place start codes (4 bytes) before NAL units,
                                 * otherwise place size (4 bytes) before NAL units. */
    int i_sps_id;               /* SPS and PPS id number */
    int b_vfr_input;            /* VFR input.  If 1, use timebase and timestamps for ratecontrol purposes.
                                 * If 0, use fps only. */
    int b_pulldown;             /* use explicity set timebase for CFR */
    uint32_t i_fps_num;         // 帧率分子,为精确表达帧率,使用分子/分母两个变量表示
    uint32_t i_fps_den;         // 帧率分母
    uint32_t i_timebase_num;    /* Timebase numerator */
    uint32_t i_timebase_den;    /* Timebase denominator */

    int b_tff;

    /* Pulldown:
     * The correct pic_struct must be passed with each input frame.
     * The input timebase should be the timebase corresponding to the output framerate. This should be constant.
     * e.g. for 3:2 pulldown timebase should be 1001/30000
     * The PTS passed with each frame must be the PTS of the frame after pulldown is applied.
     * Frame doubling and tripling require b_vfr_input set to zero (see H.264 Table D-1)
     *
     * Pulldown changes are not clearly defined in H.264. Therefore, it is the calling app's responsibility to manage this.
     */

    int b_pic_struct;

    /* Fake Interlaced.
     *
     * Used only when b_interlaced=0. Setting this flag makes it possible to flag the stream as PAFF interlaced yet
     * encode all frames progessively. It is useful for encoding 25p and 30p Blu-Ray streams.
     */

    int b_fake_interlaced;

    /* Don't optimize header parameters based on video content, e.g. ensure that splitting an input video, compressing
     * each part, and stitching them back together will result in identical SPS/PPS. This is necessary for stitching
     * with container formats that don't allow multiple SPS/PPS. */
    int b_stitchable;

    int b_opencl;            /* use OpenCL when available */
    int i_opencl_device;     /* specify count of GPU devices to skip, for CLI users */
    void *opencl_device_id;  /* pass explicit cl_device_id as void*, for API users */
    char *psz_clbin_file;    /* filename (in UTF-8) of the compiled OpenCL kernel cache file */

    /* Slicing parameters */
    int i_slice_max_size;    /* Max size per slice in bytes; includes estimated NAL overhead. */
    int i_slice_max_mbs;     /* Max number of MBs per slice; overrides i_slice_count. */
    int i_slice_min_mbs;     /* Min number of MBs per slice */
    int i_slice_count;       /* Number of slices per frame: forces rectangular slices. */
    int i_slice_count_max;   /* Absolute cap on slices per frame; stops applying slice-max-size
                              * and slice-max-mbs if this is reached. */

    /* Optional callback for freeing this x264_param_t when it is done being used.
     * Only used when the x264_param_t sits in memory for an indefinite period of time,
     * i.e. when an x264_param_t is passed to x264_t in an x264_picture_t or in zones.
     * Not used when x264_encoder_reconfig is called directly. */
    void (*param_free)( void* );

    /* Optional low-level callback for low-latency encoding.  Called for each output NAL unit
     * immediately after the NAL unit is finished encoding.  This allows the calling application
     * to begin processing video data (e.g. by sending packets over a network) before the frame
     * is done encoding.
     *
     * This callback MUST do the following in order to work correctly:
     * 1) Have available an output buffer of at least size nal->i_payload*3/2 + 5 + 64.
     * 2) Call x264_nal_encode( h, dst, nal ), where dst is the output buffer.
     * After these steps, the content of nal is valid and can be used in the same way as if
     * the NAL unit were output by x264_encoder_encode.
     *
     * This does not need to be synchronous with the encoding process: the data pointed to
     * by nal (both before and after x264_nal_encode) will remain valid until the next
     * x264_encoder_encode call.  The callback must be re-entrant.
     *
     * This callback does not work with frame-based threads; threads must be disabled
     * or sliced-threads enabled.  This callback also does not work as one would expect
     * with HRD -- since the buffering period SEI cannot be calculated until the frame
     * is finished encoding, it will not be sent via this callback.
     *
     * Note also that the NALs are not necessarily returned in order when sliced threads is
     * enabled.  Accordingly, the variable i_first_mb and i_last_mb are available in
     * x264_nal_t to help the calling application reorder the slices if necessary.
     *
     * When this callback is enabled, x264_encoder_encode does not return valid NALs;
     * the calling application is expected to acquire all output NALs through the callback.
     *
     * It is generally sensible to combine this callback with a use of slice-max-mbs or
     * slice-max-size.
     *
     * The opaque pointer is the opaque pointer from the input frame associated with this
     * NAL unit. This helps distinguish between nalu_process calls from different sources,
     * e.g. if doing multiple encodes in one process.
     */
    void (*nalu_process)( x264_t *h, x264_nal_t *nal, void *opaque );
} x264_param_t;

x264_t结构体(最为重要),此结构体包含了编码所需的全部参数,需重点阅读分析:

struct x264_t
{
    /* encoder parameters */
    x264_param_t    param;

    x264_t          *thread[X264_THREAD_MAX+1];
    x264_t          *lookahead_thread[X264_LOOKAHEAD_THREAD_MAX];
    int             b_thread_active;  // 表示线程是否为激活状态,为0表示未激活,1表示处于激活状态
    int             i_thread_phase; /* which thread to use for the next frame 指示下一帧编码将用到的线程 */
    int             i_thread_idx;   /* which thread this is 即当前帧编号 */
    int             i_threadslice_start; /* first row in this thread slice */
    int             i_threadslice_end; /* row after the end of this thread slice */
    int             i_threadslice_pass; /* which pass of encoding we are on 即当前线程正在编码第几遍 */
    x264_threadpool_t *threadpool;  // 线程池
    x264_threadpool_t *lookaheadpool; // 超前线程池
    x264_pthread_mutex_t mutex;
    x264_pthread_cond_t cv;

    /* bitstream output */
    struct
    {
        int         i_nal;
        int         i_nals_allocated;  // 需要分配nal的个数
        x264_nal_t  *nal;  // NAL的相关信息
        int         i_bitstream;    /* size of p_bitstream ,即NAL数据bit数 */
        uint8_t     *p_bitstream;   /* will hold data for all nal , NAL总数据 */
        bs_t        bs;
    } out;

    uint8_t *nal_buffer;
    int      nal_buffer_size;

    x264_t          *reconfig_h;
    int             reconfig;

    /**** thread synchronization starts here ****/

    /* frame number/poc */
    int             i_frame;  // 帧
    int             i_frame_num;

    int             i_thread_frames; /* Number of different frames being encoded by threads;
                                      * 1 when sliced-threads is on. */
    int             i_nal_type;
    int             i_nal_ref_idc;

    int64_t         i_disp_fields;  /* Number of displayed fields (both coded and implied via pic_struct) */
    int             i_disp_fields_last_frame;
    int64_t         i_prev_duration; /* Duration of previous frame */
    int64_t         i_coded_fields; /* Number of coded fields (both coded and implied via pic_struct) */
    int64_t         i_cpb_delay;    /* Equal to number of fields preceding this field
                                     * since last buffering_period SEI */
    int64_t         i_coded_fields_lookahead; /* Use separate counters for lookahead */
    int64_t         i_cpb_delay_lookahead;

    int64_t         i_cpb_delay_pir_offset;
    int64_t         i_cpb_delay_pir_offset_next;

    int             b_queued_intra_refresh;
    int64_t         i_last_idr_pts;

    int             i_idr_pic_id;

    /* quantization matrix for decoding, [cqm][qp%6][coef] */
    int             (*dequant4_mf[4])[16];   /* [4][6][16] */
    int             (*dequant8_mf[4])[64];   /* [4][6][64] */
    /* quantization matrix for trellis, [cqm][qp][coef] */
    int             (*unquant4_mf[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
    int             (*unquant8_mf[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
    /* quantization matrix for deadzone */
    udctcoef        (*quant4_mf[4])[16];     /* [4][QP_MAX_SPEC+1][16] */
    udctcoef        (*quant8_mf[4])[64];     /* [4][QP_MAX_SPEC+1][64] */
    udctcoef        (*quant4_bias[4])[16];   /* [4][QP_MAX_SPEC+1][16] */
    udctcoef        (*quant8_bias[4])[64];   /* [4][QP_MAX_SPEC+1][64] */
    udctcoef        (*quant4_bias0[4])[16];  /* [4][QP_MAX_SPEC+1][16] */
    udctcoef        (*quant8_bias0[4])[64];  /* [4][QP_MAX_SPEC+1][64] */
    udctcoef        (*nr_offset_emergency)[4][64];

    /* mv/ref cost arrays. */
    uint16_t *cost_mv[QP_MAX+1];
    uint16_t *cost_mv_fpel[QP_MAX+1][4];

    const uint8_t   *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */

    /* Slice header */
    x264_slice_header_t sh;

    /* SPS / PPS */
    x264_sps_t      sps[1];
    x264_pps_t      pps[1];

    /* Slice header backup, for SEI_DEC_REF_PIC_MARKING */
    int b_sh_backup;
    x264_slice_header_t sh_backup;

    /* cabac context */
    x264_cabac_t    cabac;

    struct
    {
        /* Frames to be encoded (whose types have been decided) */
        x264_frame_t **current;
        /* Unused frames: 0 = fenc, 1 = fdec */
        x264_frame_t **unused[2];

        /* Unused blank frames (for duplicates) */
        x264_frame_t **blank_unused;

        /* frames used for reference + sentinels */
        x264_frame_t *reference[X264_REF_MAX+2];

        int i_last_keyframe;       /* Frame number of the last keyframe */
        int i_last_idr;            /* Frame number of the last IDR (not RP)*/
        int i_poc_last_open_gop;   /* Poc of the I frame of the last open-gop. The value
                                    * is only assigned during the period between that
                                    * I frame and the next P or I frame, else -1 */

        int i_input;    /* Number of input frames already accepted */

        int i_max_dpb;  /* Number of frames allocated in the decoded picture buffer */
        int i_max_ref0;
        int i_max_ref1;
        int i_delay;    /* Number of frames buffered for B reordering */
        int     i_bframe_delay;
        int64_t i_bframe_delay_time;
        int64_t i_first_pts;
        int64_t i_prev_reordered_pts[2];
        int64_t i_largest_pts;
        int64_t i_second_largest_pts;
        int b_have_lowres;  /* Whether 1/2 resolution luma planes are being used */
        int b_have_sub8x8_esa;
    } frames;

    /* current frame being encoded */
    x264_frame_t    *fenc;

    /* frame being reconstructed */
    x264_frame_t    *fdec;

    /* references lists */
    int             i_ref[2];
    x264_frame_t    *fref[2][X264_REF_MAX+3];
    x264_frame_t    *fref_nearest[2];
    int             b_ref_reorder[2];

    /* hrd */
    int initial_cpb_removal_delay;
    int initial_cpb_removal_delay_offset;
    int64_t i_reordered_pts_delay;

    /* Current MB DCT coeffs */
    struct
    {
        ALIGNED_32( dctcoef luma16x16_dc[3][16] );
        ALIGNED_16( dctcoef chroma_dc[2][8] );
        // FIXME share memory?
        ALIGNED_32( dctcoef luma8x8[12][64] );
        ALIGNED_32( dctcoef luma4x4[16*3][16] );
    } dct;

    /* MB table and cache for current frame/mb */
    struct
    {
        int     i_mb_width;
        int     i_mb_height;
        int     i_mb_count;                 /* number of mbs in a frame */

        /* Chroma subsampling */
        int     chroma_h_shift;
        int     chroma_v_shift;

        /* Strides */
        int     i_mb_stride;
        int     i_b8_stride;
        int     i_b4_stride;
        int     left_b8[2];
        int     left_b4[2];

        /* Current index */
        int     i_mb_x;
        int     i_mb_y;
        int     i_mb_xy;
        int     i_b8_xy;
        int     i_b4_xy;

        /* Search parameters */
        int     i_me_method;
        int     i_subpel_refine;
        int     b_chroma_me;
        int     b_trellis;
        int     b_noise_reduction;
        int     b_dct_decimate;
        int     i_psy_rd; /* Psy RD strength--fixed point value*/
        int     i_psy_trellis; /* Psy trellis strength--fixed point value*/

        int     b_interlaced;
        int     b_adaptive_mbaff; /* MBAFF+subme 0 requires non-adaptive MBAFF i.e. all field mbs */

        /* Allowed qpel MV range to stay within the picture + emulated edge pixels */
        int     mv_min[2];
        int     mv_max[2];
        int     mv_miny_row[3]; /* 0 == top progressive, 1 == bot progressive, 2 == interlaced */
        int     mv_maxy_row[3];
        /* Subpel MV range for motion search.
         * same mv_min/max but includes levels' i_mv_range. */
        int     mv_min_spel[2];
        int     mv_max_spel[2];
        int     mv_miny_spel_row[3];
        int     mv_maxy_spel_row[3];
        /* Fullpel MV range for motion search */
        ALIGNED_8( int16_t mv_limit_fpel[2][2] ); /* min_x, min_y, max_x, max_y */
        int     mv_miny_fpel_row[3];
        int     mv_maxy_fpel_row[3];

        /* neighboring MBs */
        unsigned int i_neighbour;
        unsigned int i_neighbour8[4];       /* neighbours of each 8x8 or 4x4 block that are available */
        unsigned int i_neighbour4[16];      /* at the time the block is coded */
        unsigned int i_neighbour_intra;     /* for constrained intra pred */
        unsigned int i_neighbour_frame;     /* ignoring slice boundaries */
        int     i_mb_type_top;
        int     i_mb_type_left[2];
        int     i_mb_type_topleft;
        int     i_mb_type_topright;
        int     i_mb_prev_xy;
        int     i_mb_left_xy[2];
        int     i_mb_top_xy;
        int     i_mb_topleft_xy;
        int     i_mb_topright_xy;
        int     i_mb_top_y;
        int     i_mb_topleft_y;
        int     i_mb_topright_y;
        const x264_left_table_t *left_index_table;
        int     i_mb_top_mbpair_xy;
        int     topleft_partition;
        int     b_allow_skip;
        int     field_decoding_flag;

        /**** thread synchronization ends here ****/
        /* subsequent variables are either thread-local or constant,
         * and won't be copied from one thread to another */

        /* mb table */
        uint8_t *base;                      /* base pointer for all malloced data in this mb */
        int8_t  *type;                      /* mb type */
        uint8_t *partition;                 /* mb partition */
        int8_t  *qp;                        /* mb qp */
        int16_t *cbp;                       /* mb cbp: 0x0?: luma, 0x?0: chroma, 0x100: luma dc, 0x0200 and 0x0400: chroma dc  (all set for PCM)*/
        int8_t  (*intra4x4_pred_mode)[8];   /* intra4x4 pred mode. for non I4x4 set to I_PRED_4x4_DC(2) */
                                            /* actually has only 7 entries; set to 8 for write-combining optimizations */
        uint8_t (*non_zero_count)[16*3];    /* nzc. for I_PCM set to 16 */
        int8_t  *chroma_pred_mode;          /* chroma_pred_mode. cabac only. for non intra I_PRED_CHROMA_DC(0) */
        int16_t (*mv[2])[2];                /* mb mv. set to 0 for intra mb */
        uint8_t (*mvd[2])[8][2];            /* absolute value of mb mv difference with predict, clipped to [0,33]. set to 0 if intra. cabac only */
        int8_t   *ref[2];                   /* mb ref. set to -1 if non used (intra or Lx only) */
        int16_t (*mvr[2][X264_REF_MAX*2])[2];/* 16x16 mv for each possible ref */
        int8_t  *skipbp;                    /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
        int8_t  *mb_transform_size;         /* transform_size_8x8_flag of each mb */
        uint16_t *slice_table;              /* sh->first_mb of the slice that the indexed mb is part of
                                             * NOTE: this will fail on resolutions above 2^16 MBs... */
        uint8_t *field;

         /* buffer for weighted versions of the reference frames */
        pixel *p_weight_buf[X264_REF_MAX];

        /* current value */
        int     i_type;
        int     i_partition;
        ALIGNED_4( uint8_t i_sub_partition[4] );
        int     b_transform_8x8;

        int     i_cbp_luma;
        int     i_cbp_chroma;

        int     i_intra16x16_pred_mode;
        int     i_chroma_pred_mode;

        /* skip flags for i4x4 and i8x8
         * 0 = encode as normal.
         * 1 (non-RD only) = the DCT is still in h->dct, restore fdec and skip reconstruction.
         * 2 (RD only) = the DCT has since been overwritten by RD; restore that too. */
        int i_skip_intra;
        /* skip flag for motion compensation */
        /* if we've already done MC, we don't need to do it again */
        int b_skip_mc;
        /* set to true if we are re-encoding a macroblock. */
        int b_reencode_mb;
        int ip_offset; /* Used by PIR to offset the quantizer of intra-refresh blocks. */
        int b_deblock_rdo;
        int b_overflow; /* If CAVLC had a level code overflow during bitstream writing. */

        struct
        {
            /* space for p_fenc and p_fdec */
#define FENC_STRIDE 16
#define FDEC_STRIDE 32
            ALIGNED_32( pixel fenc_buf[48*FENC_STRIDE] );
            ALIGNED_32( pixel fdec_buf[52*FDEC_STRIDE] );

            /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
            ALIGNED_16( pixel i4x4_fdec_buf[16*16] );
            ALIGNED_16( pixel i8x8_fdec_buf[16*16] );
            ALIGNED_16( dctcoef i8x8_dct_buf[3][64] );
            ALIGNED_16( dctcoef i4x4_dct_buf[15][16] );
            uint32_t i4x4_nnz_buf[4];
            uint32_t i8x8_nnz_buf[4];
            int i4x4_cbp;
            int i8x8_cbp;

            /* Psy trellis DCT data */
            ALIGNED_16( dctcoef fenc_dct8[4][64] );
            ALIGNED_16( dctcoef fenc_dct4[16][16] );

            /* Psy RD SATD/SA8D scores cache */
            ALIGNED_32( uint64_t fenc_hadamard_cache[9] );
            ALIGNED_32( uint32_t fenc_satd_cache[32] );

            /* pointer over mb of the frame to be compressed */
            pixel *p_fenc[3]; /* y,u,v */
            /* pointer to the actual source frame, not a block copy */
            pixel *p_fenc_plane[3];

            /* pointer over mb of the frame to be reconstructed  */
            pixel *p_fdec[3];

            /* pointer over mb of the references */
            int i_fref[2];
            /* [12]: yN, yH, yV, yHV, (NV12 ? uv : I444 ? (uN, uH, uV, uHV, vN, ...)) */
            pixel *p_fref[2][X264_REF_MAX*2][12];
            pixel *p_fref_w[X264_REF_MAX*2];  /* weighted fullpel luma */
            uint16_t *p_integral[2][X264_REF_MAX];

            /* fref stride */
            int     i_stride[3];
        } pic;

        /* cache */
        struct
        {
            /* real intra4x4_pred_mode if I_4X4 or I_8X8, I_PRED_4x4_DC if mb available, -1 if not */
            ALIGNED_8( int8_t intra4x4_pred_mode[X264_SCAN8_LUMA_SIZE] );

            /* i_non_zero_count if available else 0x80 */
            ALIGNED_16( uint8_t non_zero_count[X264_SCAN8_SIZE] );

            /* -1 if unused, -2 if unavailable */
            ALIGNED_4( int8_t ref[2][X264_SCAN8_LUMA_SIZE] );

            /* 0 if not available */
            ALIGNED_16( int16_t mv[2][X264_SCAN8_LUMA_SIZE][2] );
            ALIGNED_8( uint8_t mvd[2][X264_SCAN8_LUMA_SIZE][2] );

            /* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
            ALIGNED_4( int8_t skip[X264_SCAN8_LUMA_SIZE] );

            ALIGNED_4( int16_t direct_mv[2][4][2] );
            ALIGNED_4( int8_t  direct_ref[2][4] );
            int     direct_partition;
            ALIGNED_4( int16_t pskip_mv[2] );

            /* number of neighbors (top and left) that used 8x8 dct */
            int     i_neighbour_transform_size;
            int     i_neighbour_skip;

            /* neighbor CBPs */
            int     i_cbp_top;
            int     i_cbp_left;

            /* extra data required for mbaff in mv prediction */
            int16_t topright_mv[2][3][2];
            int8_t  topright_ref[2][3];

            /* current mb deblock strength */
            uint8_t (*deblock_strength)[8][4];
        } cache;

        /* */
        int     i_qp;       /* current qp */
        int     i_chroma_qp;
        int     i_last_qp;  /* last qp */
        int     i_last_dqp; /* last delta qp */
        int     b_variable_qp; /* whether qp is allowed to vary per macroblock */
        int     b_lossless;
        int     b_direct_auto_read; /* take stats for --direct auto from the 2pass log */
        int     b_direct_auto_write; /* analyse direct modes, to use and/or save */

        /* lambda values */
        int     i_trellis_lambda2[2][2]; /* [luma,chroma][inter,intra] */
        int     i_psy_rd_lambda;
        int     i_chroma_lambda2_offset;

        /* B_direct and weighted prediction */
        int16_t dist_scale_factor_buf[2][2][X264_REF_MAX*2][4];
        int16_t (*dist_scale_factor)[4];
        int8_t bipred_weight_buf[2][2][X264_REF_MAX*2][4];
        int8_t (*bipred_weight)[4];
        /* maps fref1[0]'s ref indices into the current list0 */
#define map_col_to_list0(col) h->mb.map_col_to_list0[(col)+2]
        int8_t  map_col_to_list0[X264_REF_MAX+2];
        int ref_blind_dupe; /* The index of the blind reference frame duplicate. */
        int8_t deblock_ref_table[X264_REF_MAX*2+2];
#define deblock_ref_table(x) h->mb.deblock_ref_table[(x)+2]
    } mb;

    /* rate control encoding only */
    x264_ratecontrol_t *rc;

    /* stats */
    struct
    {
        /* Cumulated stats */

        /* per slice info */
        int     i_frame_count[3];
        int64_t i_frame_size[3];
        double  f_frame_qp[3];
        int     i_consecutive_bframes[X264_BFRAME_MAX+1];
        /* */
        double  f_ssd_global[3];
        double  f_psnr_average[3];
        double  f_psnr_mean_y[3];
        double  f_psnr_mean_u[3];
        double  f_psnr_mean_v[3];
        double  f_ssim_mean_y[3];
        double  f_frame_duration[3];
        /* */
        int64_t i_mb_count[3][19];
        int64_t i_mb_partition[2][17];
        int64_t i_mb_count_8x8dct[2];
        int64_t i_mb_count_ref[2][2][X264_REF_MAX*2];
        int64_t i_mb_cbp[6];
        int64_t i_mb_pred_mode[4][13];
        int64_t i_mb_field[3];
        /* */
        int     i_direct_score[2];
        int     i_direct_frames[2];
        /* num p-frames weighted */
        int     i_wpred[2];

        /* Current frame stats */
        x264_frame_stat_t frame;
    } stat;

    /* 0 = luma 4x4, 1 = luma 8x8, 2 = chroma 4x4, 3 = chroma 8x8 */
    udctcoef (*nr_offset)[64];
    uint32_t (*nr_residual_sum)[64];
    uint32_t *nr_count;

    ALIGNED_32( udctcoef nr_offset_denoise[4][64] );
    ALIGNED_32( uint32_t nr_residual_sum_buf[2][4][64] );
    uint32_t nr_count_buf[2][4];

    uint8_t luma2chroma_pixel[7]; /* Subsampled pixel size */

    /* Buffers that are allocated per-thread even in sliced threads. */
    void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
    void *scratch_buffer2; /* if the first one's already in use */
    pixel *intra_border_backup[5][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
    /* Deblock strength values are stored for each 4x4 partition. In MBAFF
     * there are four extra values that need to be stored, located in [4][i]. */
    uint8_t (*deblock_strength[2])[2][8][4];

    /* CPU functions dependents */
    x264_predict_t      predict_16x16[4+3];
    x264_predict8x8_t   predict_8x8[9+3];
    x264_predict_t      predict_4x4[9+3];
    x264_predict_t      predict_chroma[4+3];
    x264_predict_t      predict_8x8c[4+3];
    x264_predict_t      predict_8x16c[4+3];
    x264_predict_8x8_filter_t predict_8x8_filter;

    x264_pixel_function_t pixf;
    x264_mc_functions_t   mc;
    x264_dct_function_t   dctf;
    x264_zigzag_function_t zigzagf;
    x264_zigzag_function_t zigzagf_interlaced;
    x264_zigzag_function_t zigzagf_progressive;
    x264_quant_function_t quantf;
    x264_deblock_function_t loopf;
    x264_bitstream_function_t bsf;

    x264_lookahead_t *lookahead;

#if HAVE_OPENCL
    x264_opencl_t opencl;
#endif
};

x264_picture_t 结构体,此结构体用于存储编码前后的视频数据及相关信息:

typedef struct x264_picture_t
{
    /* In: force picture type (if not auto)
     *     If x264 encoding parameters are violated in the forcing of picture types,
     *     x264 will correct the input picture type and log a warning.
     * Out: type of the picture encoded */
    int     i_type;  // 输入:强制标识编码帧类型,若编码器检验参数无效时,会自动改正编码帧类型                                       输出:表示编码帧类型
    /* In: force quantizer for != X264_QP_AUTO */
    int     i_qpplus1;  // 输入:强制编码时量化参数qp数值+1
    /* In: pic_struct, for pulldown/doubling/etc...used only if b_pic_struct=1.
     *     use pic_struct_e for pic_struct inputs
     * Out: pic_struct element associated with frame */
    int     i_pic_struct;
    /* Out: whether this frame is a keyframe.  Important when using modes that result in
     * SEI recovery points being used instead of IDR frames. */
    int     b_keyframe;  // 输出: 标识是否为关键帧
    /* In: user pts, Out: pts of encoded picture (user)*/
    int64_t i_pts;    // pts 编码时间戳
    /* Out: frame dts. When the pts of the first frame is close to zero,
     *      initial frames may have a negative dts which must be dealt with by any muxer */
    int64_t i_dts;    // dts 显示时间戳
    /* In: custom encoding parameters to be set from this frame forwards
           (in coded order, not display order). If NULL, continue using
           parameters from the previous frame.  Some parameters, such as
           aspect ratio, can only be changed per-GOP due to the limitations
           of H.264 itself; in this case, the caller must force an IDR frame
           if it needs the changed parameter to apply immediately. */
    x264_param_t *param;
    /* In: raw image data */
    /* Out: reconstructed image data.  x264 may skip part of the reconstruction process,
            e.g. deblocking, in frames where it isn't necessary.  To force complete
            reconstruction, at a small speed cost, set b_full_recon. */
    x264_image_t img;  // 输入:原始yuv图像数据             输出:重建图像数据
    /* In: optional information to modify encoder decisions for this frame
     * Out: information about the encoded frame */
    x264_image_properties_t prop;
    /* Out: HRD timing information. Output only when i_nal_hrd is set. */
    x264_hrd_t hrd_timing;  // 输出:仅当i_nal_hrd设置为1时此参数才有效,HRD时间信息
    /* In: arbitrary user SEI (e.g subtitles, AFDs) */
    x264_sei_t extra_sei;  // SEI信息
    /* private user data. copied from input to output frames. */
    void *opaque;  // 私有用户信息,可用于加密
} x264_picture_t;

 

你可能感兴趣的:(视频处理)