阅读此文档需要有一定 H264 及 FFmpeg 知识基础,未入门可参阅 雷神博客 https://blog.csdn.net/leixiaohua1020
大家如果有接触过 H264 编码,一般也会了解到 H264 编码有两种格式:一种是在 H264 编码协议附录B 中提供的参考格式 ,简称 Annex-B 格式;另一种是以起始码分割 nalu 的 avcC 格式。这两种格式的具体差异和用途网上有不少介绍,此处不再赘述,本文档主要是对这两种格式的实现(FFmpeg h264解码器)做一个代码分析。
首先我们可以看到在 h264 解析器文件(h264_parser.c)下 H264ParseContext 结构体中有一个 is_avc 变量,该参数为 1 表示 h264 码流格式为 avcC,0 表示 Annex-B 格式。
avcC extradata 格式如下:
bits
8 version ( always 0x01 )
8 avc profile ( sps[0][1] )
8 avc compatibility ( sps[0][2] )
8 avc level ( sps[0][3] )
6 reserved ( all bits on )
2 NALULengthSizeMinusOne // naul值前缀长度-1,如果naul前缀是4,则该值为3
3 reserved ( all bits on )
5 number of SPS NALUs (usually 1)
repeated once per SPS:
16 SPS size
variable SPS NALU data
8 number of PPS NALUs (usually 1)
repeated once per PPS
16 PPS size
variable PPS NALU data
H264ParseContext 结构体定义
typedef struct H264ParseContext {
ParseContext pc;
H264ParamSets ps;
H264DSPContext h264dsp;
H264POCContext poc;
H264SEIContext sei;
int is_avc; // h264码流格式,0表示Annex-B,1表示avcC格式
int nal_length_size; // avcC 格式中,nalu包大小的前缀长度,对应NALULengthSizeMinusOne 标识。
int got_first;
int picture_structure;
uint8_t parse_history[6];
int parse_history_count;
int parse_last_mb;
int64_t reference_dts;
int last_frame_num, last_picture_structure;
} H264ParseContext;
is_avc 赋值
在解析 h264 流函数中(h264_parse)解码 extradata (ff_h264_decode_extradata)的同时对 is_avc 参数赋值。 根据首字节是否为 1 判断即可(Annex-B 以起始码分割 nalu,首字节为 0)。
static int h264_parse(AVCodecParserContext *s,
AVCodecContext *avctx,
const uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size)
{
H264ParseContext *p = s->priv_data;
ParseContext *pc = &p->pc;
int next;
if (!p->got_first) {
p->got_first = 1;
if (avctx->extradata_size) {
ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
&p->ps, &p->is_avc, &p->nal_length_size,
avctx->err_recognition, avctx);
}
}
if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
next = buf_size;
} else {
next = h264_find_frame_end(p, buf, buf_size, avctx);
if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
*poutbuf = NULL;
*poutbuf_size = 0;
return buf_size;
}
if (next < 0 && next != END_NOT_FOUND) {
av_assert1(pc->last_index + next >= 0);
h264_find_frame_end(p, &pc->buffer[pc->last_index + next], -next, avctx); // update state
}
}
parse_nal_units(s, avctx, buf, buf_size);
...
}
解码 extradata 函数
int ff_h264_decode_extradata(const uint8_t *data, int size, H264ParamSets *ps,
int *is_avc, int *nal_length_size,
int err_recognition, void *logctx)
{
int ret;
if (!data || size <= 0)
return -1;
if (data[0] == 1) { // avvC 格式的 extradata 首字节总是 1
int i, cnt, nalsize;
const uint8_t *p = data;
*is_avc = 1;
if (size < 7) {
av_log(logctx, AV_LOG_ERROR, "avcC %d too short\n", size);
return AVERROR_INVALIDDATA;
}
// Decode sps from avcC
cnt = *(p + 5) & 0x1f; // Number of sps
p += 6;
for (i = 0; i < cnt; i++) {
nalsize = AV_RB16(p) + 2;
if (nalsize > size - (p - data))
return AVERROR_INVALIDDATA;
ret = decode_extradata_ps_mp4(p, nalsize, ps, err_recognition, logctx);
if (ret < 0) {
av_log(logctx, AV_LOG_ERROR,
"Decoding sps %d from avcC failed\n", i);
return ret;
}
p += nalsize;
}
// Decode pps from avcC
cnt = *(p++); // Number of pps
for (i = 0; i < cnt; i++) {
nalsize = AV_RB16(p) + 2;
if (nalsize > size - (p - data))
return AVERROR_INVALIDDATA;
ret = decode_extradata_ps_mp4(p, nalsize, ps, err_recognition, logctx);
if (ret < 0) {
av_log(logctx, AV_LOG_ERROR,
"Decoding pps %d from avcC failed\n", i);
return ret;
}
p += nalsize;
}
// Store right nal length size that will be used to parse all other nals
*nal_length_size = (data[4] & 0x03) + 1;
} else {
*is_avc = 0;
ret = decode_extradata_ps(data, size, ps, 0, logctx);
if (ret < 0)
return ret;
}
return size;
}
同样的,解码 extredata 后进行了 nal_units 的解析操作,主要解析以添加代码注释
/**
* Parse NAL units of found picture and decode some basic information.
*
* @param s parser context.
* @param avctx codec context.
* @param buf buffer with field/frame data.
* @param buf_size size of the buffer.
*/
static inline int parse_nal_units(AVCodecParserContext *s,
AVCodecContext *avctx,
const uint8_t * const buf, int buf_size)
{
H264ParseContext *p = s->priv_data;
H2645NAL nal = { NULL };
int buf_index, next_avc;
unsigned int pps_id;
unsigned int slice_type;
int state = -1, got_reset = 0;
int q264 = buf_size >=4 && !memcmp("Q264", buf, 4);
int field_poc[2];
int ret;
/* set some sane default values */
s->pict_type = AV_PICTURE_TYPE_I;
s->key_frame = 0;
s->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
ff_h264_sei_uninit(&p->sei);
p->sei.frame_packing.frame_packing_arrangement_cancel_flag = -1;
if (!buf_size)
return 0;
buf_index = 0; // buffer 中 nalu 起始位置,用来遍历整个 buffer
next_avc = p->is_avc ? 0 : buf_size; // avcC 格式时有效,buffer 中 nalu 块包含前缀(存放nalu长度)的起始位置
for (;;) {
const SPS *sps;
int src_length, consumed, nalsize = 0; // nalsize:avcC 格式中存储 nalu 的长度
if (buf_index >= next_avc) { // 如果是 avcC 格式,进入此语句(通过 nalu 头信息获取每个 nalu 长度);否则进入 else 语句
nalsize = get_nalsize(p->nal_length_size, buf, buf_size, &buf_index, avctx); // 计算下一个nalu的大小,buf_index指向nalu起始位置
if (nalsize < 0) // 遍历结束(下一个nalu size 小于 0)
break;
// 下一个 avcC nalu 块位置,后面 buf_index += consumed; 的计算会使buf_index>=next_avc
next_avc = buf_index + nalsize;
} else { // Annex-B 格式(以 star code 分割 nalu)
buf_index = find_start_code(buf, buf_size, buf_index, next_avc); // 寻找起始码并返回起始码后nalu位置
if (buf_index >= buf_size) // 遍历结束(指针位置已指向缓冲区末尾)
break;
if (buf_index >= next_avc) // 如果计算 nalu 起始位置大于 next_avc 值,则进入上面 avcC 计算 nalu 起始位置方法(容is_avc错?)
continue;
}
src_length = next_avc - buf_index; // avcC格式下为nalsize; Annex-B格式下,值为从buf_index至结束的有效长度;
state = buf[buf_index]; // nal header,type 为第一字节的4-8位表示
switch (state & 0x1f) {
case H264_NAL_SLICE:
case H264_NAL_IDR_SLICE:
// Do not walk the whole buffer just to decode slice header
if ((state & 0x1f) == H264_NAL_IDR_SLICE || ((state >> 5) & 0x3) == 0) {
/* IDR or disposable slice
* No need to decode many bytes because MMCOs shall not be present. */
if (src_length > 60)
src_length = 60;
} else {
/* To decode up to MMCOs */
if (src_length > 1000)
src_length = 1000;
}
break;
}
// 提取 nalu 的原始数据字节流,并生成一个H2645NAL结构体变量
consumed = ff_h2645_extract_rbsp(buf + buf_index, src_length, &nal, 1);
if (consumed < 0)
break;
buf_index += consumed;
ret = init_get_bits8(&nal.gb, nal.data, nal.size);
if (ret < 0)
goto fail;
get_bits1(&nal.gb);
nal.ref_idc = get_bits(&nal.gb, 2); //nalu参考级别
nal.type = get_bits(&nal.gb, 5); //nal unit type
switch (nal.type) {
case H264_NAL_SPS:
ff_h264_decode_seq_parameter_set(&nal.gb, avctx, &p->ps, 0);
break;
case H264_NAL_PPS:
ff_h264_decode_picture_parameter_set(&nal.gb, avctx, &p->ps,
nal.size_bits);
break;
case H264_NAL_SEI:
ff_h264_sei_decode(&p->sei, &nal.gb, &p->ps, avctx);
if (p->sei.type == 5 && p->sei.unregistered.user_data != NULL) {
av_h264_sei(5, p->sei.unregistered.user_data);
}
break;
case H264_NAL_IDR_SLICE:
s->key_frame = 1;
p->poc.prev_frame_num = 0;
p->poc.prev_frame_num_offset = 0;
p->poc.prev_poc_msb =
p->poc.prev_poc_lsb = 0;
/* fall through */
case H264_NAL_SLICE:
get_ue_golomb_long(&nal.gb); // skip first_mb_in_slice
slice_type = get_ue_golomb_31(&nal.gb);
s->pict_type = ff_h264_golomb_to_pict_type[slice_type % 5];
...
}
以上只是对 h264 码流解析时的一个分析,如果使用软件解码,解码时也会有类似的 nal units 解析操作,后续有时间继续更新,本文档也将继续完善。