FFmpeg 源码导读 —— H264码流格式分析

阅读此文档需要有一定 H264 及 FFmpeg 知识基础,未入门可参阅 雷神博客 https://blog.csdn.net/leixiaohua1020

大家如果有接触过 H264 编码,一般也会了解到 H264 编码有两种格式:一种是在 H264 编码协议附录B 中提供的参考格式 ,简称 Annex-B 格式;另一种是以起始码分割 nalu 的 avcC 格式。这两种格式的具体差异和用途网上有不少介绍,此处不再赘述,本文档主要是对这两种格式的实现(FFmpeg h264解码器)做一个代码分析。

首先我们可以看到在 h264 解析器文件(h264_parser.c)下 H264ParseContext 结构体中有一个 is_avc 变量,该参数为 1 表示 h264 码流格式为 avcC,0 表示 Annex-B 格式。

avcC extradata 格式如下:

bits    
8   version ( always 0x01 )
8   avc profile ( sps[0][1] )
8   avc compatibility ( sps[0][2] )
8   avc level ( sps[0][3] )
6   reserved ( all bits on )
2   NALULengthSizeMinusOne    // naul值前缀长度-1,如果naul前缀是4,则该值为3
3   reserved ( all bits on )
5   number of SPS NALUs (usually 1)
repeated once per SPS:
  16     SPS size
  variable   SPS NALU data
8   number of PPS NALUs (usually 1)
repeated once per PPS
  16    PPS size
  variable PPS NALU data

H264ParseContext 结构体定义

typedef struct H264ParseContext {
    ParseContext pc;
    H264ParamSets ps;
    H264DSPContext h264dsp;
    H264POCContext poc;
    H264SEIContext sei;
    int is_avc;  // h264码流格式,0表示Annex-B,1表示avcC格式
    int nal_length_size; // avcC 格式中,nalu包大小的前缀长度,对应NALULengthSizeMinusOne 标识。
    int got_first;
    int picture_structure;
    uint8_t parse_history[6];
    int parse_history_count;
    int parse_last_mb;
    int64_t reference_dts;
    int last_frame_num, last_picture_structure;
} H264ParseContext;

is_avc 赋值
在解析 h264 流函数中(h264_parse)解码 extradata (ff_h264_decode_extradata)的同时对 is_avc 参数赋值。 根据首字节是否为 1 判断即可(Annex-B 以起始码分割 nalu,首字节为 0)。

static int h264_parse(AVCodecParserContext *s,
                      AVCodecContext *avctx,
                      const uint8_t **poutbuf, int *poutbuf_size,
                      const uint8_t *buf, int buf_size)
{
    H264ParseContext *p = s->priv_data;
    ParseContext *pc = &p->pc;
    int next;

    if (!p->got_first) {
        p->got_first = 1;
        if (avctx->extradata_size) {
            ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
                                     &p->ps, &p->is_avc, &p->nal_length_size,
                                     avctx->err_recognition, avctx);
        }
    }

    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
        next = buf_size;
    } else {
        next = h264_find_frame_end(p, buf, buf_size, avctx);

        if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
            *poutbuf      = NULL;
            *poutbuf_size = 0;
            return buf_size;
        }

        if (next < 0 && next != END_NOT_FOUND) {
            av_assert1(pc->last_index + next >= 0);
            h264_find_frame_end(p, &pc->buffer[pc->last_index + next], -next, avctx); // update state
        }
    }

    parse_nal_units(s, avctx, buf, buf_size);

    ...
    }

解码 extradata 函数

int ff_h264_decode_extradata(const uint8_t *data, int size, H264ParamSets *ps,
                             int *is_avc, int *nal_length_size,
                             int err_recognition, void *logctx)
{
    int ret;

    if (!data || size <= 0)
        return -1;

    if (data[0] == 1) { // avvC 格式的 extradata 首字节总是 1
        int i, cnt, nalsize;
        const uint8_t *p = data;

        *is_avc = 1;

        if (size < 7) {
            av_log(logctx, AV_LOG_ERROR, "avcC %d too short\n", size);
            return AVERROR_INVALIDDATA;
        }

        // Decode sps from avcC
        cnt = *(p + 5) & 0x1f; // Number of sps
        p  += 6;
        for (i = 0; i < cnt; i++) {
            nalsize = AV_RB16(p) + 2;
            if (nalsize > size - (p - data))
                return AVERROR_INVALIDDATA;
            ret = decode_extradata_ps_mp4(p, nalsize, ps, err_recognition, logctx);
            if (ret < 0) {
                av_log(logctx, AV_LOG_ERROR,
                       "Decoding sps %d from avcC failed\n", i);
                return ret;
            }
            p += nalsize;
        }
        // Decode pps from avcC
        cnt = *(p++); // Number of pps
        for (i = 0; i < cnt; i++) {
            nalsize = AV_RB16(p) + 2;
            if (nalsize > size - (p - data))
                return AVERROR_INVALIDDATA;
            ret = decode_extradata_ps_mp4(p, nalsize, ps, err_recognition, logctx);
            if (ret < 0) {
                av_log(logctx, AV_LOG_ERROR,
                       "Decoding pps %d from avcC failed\n", i);
                return ret;
            }
            p += nalsize;
        }
        // Store right nal length size that will be used to parse all other nals
        *nal_length_size = (data[4] & 0x03) + 1;
    } else {
        *is_avc = 0;
        ret = decode_extradata_ps(data, size, ps, 0, logctx);
        if (ret < 0)
            return ret;
    }
    return size;
}

同样的,解码 extredata 后进行了 nal_units 的解析操作,主要解析以添加代码注释

/**
 * Parse NAL units of found picture and decode some basic information.
 *
 * @param s parser context.
 * @param avctx codec context.
 * @param buf buffer with field/frame data.
 * @param buf_size size of the buffer.
 */
static inline int parse_nal_units(AVCodecParserContext *s,
                                  AVCodecContext *avctx,
                                  const uint8_t * const buf, int buf_size)
{
    H264ParseContext *p = s->priv_data;
    H2645NAL nal = { NULL };
    int buf_index, next_avc;
    unsigned int pps_id;
    unsigned int slice_type;
    int state = -1, got_reset = 0;
    int q264 = buf_size >=4 && !memcmp("Q264", buf, 4);
    int field_poc[2];
    int ret;

    /* set some sane default values */
    s->pict_type         = AV_PICTURE_TYPE_I;
    s->key_frame         = 0;
    s->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;

    ff_h264_sei_uninit(&p->sei);
    p->sei.frame_packing.frame_packing_arrangement_cancel_flag = -1;

    if (!buf_size)
        return 0;

    buf_index     = 0;  // buffer 中 nalu 起始位置,用来遍历整个 buffer
    next_avc      = p->is_avc ? 0 : buf_size; // avcC 格式时有效,buffer 中 nalu 块包含前缀(存放nalu长度)的起始位置
    for (;;) {
        const SPS *sps;
        int src_length, consumed, nalsize = 0; // nalsize:avcC 格式中存储 nalu 的长度

        if (buf_index >= next_avc) {  // 如果是 avcC 格式,进入此语句(通过 nalu 头信息获取每个 nalu 长度);否则进入 else 语句
            nalsize = get_nalsize(p->nal_length_size, buf, buf_size, &buf_index, avctx); // 计算下一个nalu的大小,buf_index指向nalu起始位置
            if (nalsize < 0) // 遍历结束(下一个nalu size 小于 0)
                break;
            // 下一个 avcC nalu 块位置,后面 buf_index += consumed; 的计算会使buf_index>=next_avc
            next_avc = buf_index + nalsize;  
        } else { // Annex-B 格式(以 star code 分割 nalu)
            buf_index = find_start_code(buf, buf_size, buf_index, next_avc); // 寻找起始码并返回起始码后nalu位置
            if (buf_index >= buf_size) // 遍历结束(指针位置已指向缓冲区末尾)
                break;
            if (buf_index >= next_avc) // 如果计算 nalu 起始位置大于 next_avc 值,则进入上面 avcC 计算 nalu 起始位置方法(容is_avc错?)
                continue;
        }
        src_length = next_avc - buf_index;  // avcC格式下为nalsize; Annex-B格式下,值为从buf_index至结束的有效长度;

        state = buf[buf_index];  // nal header,type 为第一字节的4-8位表示
        switch (state & 0x1f) {
        case H264_NAL_SLICE:
        case H264_NAL_IDR_SLICE:
            // Do not walk the whole buffer just to decode slice header
            if ((state & 0x1f) == H264_NAL_IDR_SLICE || ((state >> 5) & 0x3) == 0) {
                /* IDR or disposable slice
                 * No need to decode many bytes because MMCOs shall not be present. */
                if (src_length > 60)
                    src_length = 60;
            } else {
                /* To decode up to MMCOs */
                if (src_length > 1000)
                    src_length = 1000;
            }
            break;
        }
        // 提取 nalu 的原始数据字节流,并生成一个H2645NAL结构体变量
        consumed = ff_h2645_extract_rbsp(buf + buf_index, src_length, &nal, 1); 
        if (consumed < 0)
            break;

        buf_index += consumed;

        ret = init_get_bits8(&nal.gb, nal.data, nal.size);
        if (ret < 0)
            goto fail;
        get_bits1(&nal.gb);
        nal.ref_idc = get_bits(&nal.gb, 2); //nalu参考级别
        nal.type    = get_bits(&nal.gb, 5); //nal unit type

        switch (nal.type) {
        case H264_NAL_SPS:
            ff_h264_decode_seq_parameter_set(&nal.gb, avctx, &p->ps, 0);
            break;
        case H264_NAL_PPS:
            ff_h264_decode_picture_parameter_set(&nal.gb, avctx, &p->ps,
                                                 nal.size_bits);
            break;
        case H264_NAL_SEI:
            ff_h264_sei_decode(&p->sei, &nal.gb, &p->ps, avctx);
            if (p->sei.type == 5 && p->sei.unregistered.user_data != NULL) {
                av_h264_sei(5, p->sei.unregistered.user_data);
            }
            break;
        case H264_NAL_IDR_SLICE:
            s->key_frame = 1;

            p->poc.prev_frame_num        = 0;
            p->poc.prev_frame_num_offset = 0;
            p->poc.prev_poc_msb          =
            p->poc.prev_poc_lsb          = 0;
        /* fall through */
        case H264_NAL_SLICE:
            get_ue_golomb_long(&nal.gb);  // skip first_mb_in_slice
            slice_type   = get_ue_golomb_31(&nal.gb);
            s->pict_type = ff_h264_golomb_to_pict_type[slice_type % 5];
...
}

以上只是对 h264 码流解析时的一个分析,如果使用软件解码,解码时也会有类似的 nal units 解析操作,后续有时间继续更新,本文档也将继续完善。

你可能感兴趣的:(ffmpeg,h.264)