目录
- 参考
- AV1的码流结构
- 码流解析示例
1. 参考
- [1] AV1 Bitstream & Decoding Process
Specification#Version 1.0.0 with Errata 1 - [2] 雷霄骅/视音频数据处理入门:H.264视频码流解析
2. AV1的码流结构
AV1码流由OBU(Open Bitstream Unit)码流单元组成,OBU的结构如下所示。
--------------------------------------
obu_header | *obu_size | obu_payload |
--------------------------------------
- obu_size指的是obu_payload的字节大小。
- obu_size不是必须的,由obu_header中的obu_has_size_field字段标识是否存在,不存在时需要由外部告知OBU的大小。
- 标准[1]中定义的“Low overhead bitstream format”格式要求obu_has_size_field必须为1。
- 对于需要更容易跳过帧或时间单位的格式的应用程序,[1]定义了Annex B长度分隔的码流格式,obu_has_size_field不为1。
- 派生的规范,例如支持存储AV1视频的容器格式,应该指出它们所依赖的是哪种格式。
- 其他打包OBUs视频流的方法也是允许的。
OBU结构的语法如下所示,略去了各种类型obu解析payload的部分[1]。
open_bitstream_unit( sz ) {
obu_header()
if ( obu_has_size_field ) {
obu_size //leb128()
} else {
obu_size = sz - 1 - obu_extension_flag
}
....
}
obu_header() {
obu_forbidden_bit // f(1)
obu_type // f(4)
obu_extension_flag // f(1)
obu_has_size_field // f(1)
obu_reserved_1bit // f(1)
if ( obu_extension_flag == 1 )
obu_extension_header()
}
obu_extension_header() {
temporal_id //f(3)
spatial_id //f(2)
extension_header_reserved_3bits //f(3)
}
- f(n)表示从流中读取n比特组成的无符号整数,比特位从高到低。
- obu_header中的obu_extension_flag字段标识是否还有obu_extension_header。
- obu_type标识OBU的类型,类型的取值如下表所示。
- leb128()的读取可变长的小字端的无符号整形数,具体过程下面有说明。
obu_type取值
obu_type | Name of obu_type |
---|---|
0 | Reserved |
1 | OBU_SEQUENCE_HEADER |
2 | OBU_TEMPORAL_DELIMITER |
3 | OBU_FRAME_HEADER |
4 | OBU_TILE_GROUP |
5 | OBU_METADATA |
6 | OBU_FRAME |
7 | OBU_REDUNDANT_FRAME_HEADER |
8 | OBU_TILE_LIST |
9-14 | Reserved |
15 | OBU_PADDING |
leb128()
读取可变长的小字端的无符号整形数。读取一个字节时,如果最高比特为1表示需要读取更多的字节,为0表示这是最后一个字节了。解析过程如下:
leb128() {
value = 0
Leb128Bytes = 0
for (i = 0; i < 8; i++) {
leb128_byte //f(8)
value |= ( (leb128_byte & 0x7f) << (i*7) )
Leb128Bytes += 1
if ( !(leb128_byte & 0x80) ) {
break
}
}
return value
}
3. 码流解析示例
示例为解析IVF视频文件格式中的AV1码流,打印OBU的统计信息。IVF视频格式在IVF视频文件格式有介绍。
//https://www.jianshu.com/u/3a66dddbdb3d
#include
#include
#include
#include
char *appname = NULL;
FILE *bitstream = NULL; //!< the bit stream file
typedef enum {
OBU_SEQUENCE_HEADER = 1,
OBU_TEMPORAL_DELIMITER = 2,
OBU_FRAME_HEADER = 3,
OBU_TILE_GROUP = 4,
OBU_METADATA = 5,
OBU_FRAME = 6,
OBU_REDUNDANT_FRAME_HEADER = 7,
OBU_TILE_LIST = 8,
OBU_PADDING = 15
} OBU_TYPE;
const char* get_obu_type_name(OBU_TYPE type) {
switch (type) {
case OBU_SEQUENCE_HEADER: return "SEQ_H";
case OBU_TEMPORAL_DELIMITER: return "TEM_D";
case OBU_FRAME_HEADER: return "FRA_H";
case OBU_TILE_GROUP: return "TIL_G";
case OBU_METADATA: return "MET_D";
case OBU_FRAME: return "FRAME";
case OBU_REDUNDANT_FRAME_HEADER: return "R_F_H";
case OBU_TILE_LIST: return "TIL_L";
case OBU_PADDING: return "PADDI";
default:
return "UNKNOWN";
}
}
typedef struct {
uint64_t obu_header_size;
unsigned obu_type;
uint64_t obu_size; //leb128(), contains the size in bytes of the OBU not including the bytes within obu_header or the obu_size syntax
int extension_flag;
int has_size_field;
//extension_flag == 1
int temporal_id;
int spatial_id;
} OBU_t;
typedef struct IVFMetaData {
char sign[5];
char codec_tag[5];
unsigned int width;
unsigned int height;
unsigned int framerate;
unsigned int timescale;
unsigned int frame_count;
} IVFMetaData;
int64_t read(FILE *f, unsigned char *buf, int64_t size) {
return fread(buf, 1, size, f);
}
int64_t skip(FILE *f, int64_t offset) {
return fseek(f, offset, SEEK_CUR);
}
unsigned int read8(FILE *f) {
unsigned char val;
if (read(f, &val, 1) == 1) {
return val;
}
return 0;
}
unsigned int readl16(FILE *f) {
unsigned int val;
val = read8(f);
val |= read8(f) << 8;
return val;
}
unsigned int readl32(FILE *f) {
unsigned int val;
val = readl16(f);
val |= readl16(f) << 16;
return val;
}
uint64_t readl64(FILE *f) {
uint64_t val;
val = readl32(f);
val |= ((uint64_t)readl32(f)) << 32;
return val;
}
uint64_t leb128(FILE *f, int *read_bytes_num) {
uint64_t val = 0;
int i = 0;
for (; i < 8; i++) {
unsigned int leb128_byte = read8(f);
val |= ( (leb128_byte & 0x7f) << (i*7) );
if ( !(leb128_byte & 0x80) ) {
break;
}
}
*read_bytes_num = i + 1;
return val;
}
static int ivf_read_header(IVFMetaData *ivf) {
read(bitstream, ivf->sign, 4);
if (strcmp(ivf->sign, "DKIF") != 0) {
fprintf(stderr, "not a IVF file, sign=%s.\n", ivf->sign);
return -1;
}
skip(bitstream, 2); //version
skip(bitstream, 2); //header size
read(bitstream, ivf->codec_tag, 4);
ivf->width = readl16(bitstream);
ivf->height = readl16(bitstream);
ivf->framerate = readl32(bitstream);
ivf->timescale = readl32(bitstream);
ivf->frame_count = readl32(bitstream);
skip(bitstream, 4); //unused
return 0;
}
int get_obu(OBU_t *obu, int sz){
unsigned char obu_header;
if (read(bitstream, &obu_header, 1) != 1) {
fprintf(stderr, "read obu_header failed.\n");
return -1;
}
obu->obu_type = (obu_header >> 3) & 0x0F;
obu->extension_flag = (obu_header >> 2) & 0x01;
obu->has_size_field = (obu_header >> 1) & 0x01;
if (obu->extension_flag == 1) {
unsigned char obu_extension_header;
if (read(bitstream, &obu_extension_header, 1) != 1) {
fprintf(stderr, "read obu_extension_header failed.\n");
return -1;
} else {
obu->temporal_id = (obu_extension_header >> 5) & 0x07;
obu->spatial_id = (obu_extension_header >> 3) & 0x03;
}
}
int size_field_bytes_num = 0;
if (obu->has_size_field == 1) {
obu->obu_size = leb128(bitstream, &size_field_bytes_num);
} else {
obu->obu_size = sz - 1 - obu->extension_flag;
}
obu->obu_header_size = 1 + obu->extension_flag + size_field_bytes_num;
if (obu->obu_size > 0) {
if (0 != skip(bitstream, obu->obu_size)){
fprintf(stderr, "get_obu: cannot seek in the bitstream file");
return -1;
}
}
return 0;
}
/**
* Analysis AV1 Bitstream in IVF file
* @param url location of input IVF file contains AV1 bitstream.
*/
int simplest_av1_parser(char *url){
int ret = 0;
IVFMetaData *ivf_meta_data;
OBU_t *obu;
bitstream = fopen(url, "rb+");
if (!bitstream) {
printf("Open file error\n");
return -1;
}
obu = (OBU_t*) calloc (1, sizeof (OBU_t));
if (!obu) {
fprintf(stdout, "Alloc OBU_t Error\n");
return -1;
}
ivf_meta_data = (IVFMetaData *) calloc(1, sizeof(IVFMetaData));
if (!ivf_meta_data) {
fprintf(stdout, "Alloc IVFMetaData Error\n");
ret = -1;
goto end;
}
if (ivf_read_header(ivf_meta_data) != 0) {
fprintf(stderr, "read ivf header failed.\n");
ret = -1;
goto end;
}
fprintf(stdout, "ivf header: sign=%s, codec_tag=%s, width=%d, height=%d, "
" framerate=%d, timescale=%d, frame_count=%d\n",
ivf_meta_data->sign,
ivf_meta_data->codec_tag,
ivf_meta_data->width,
ivf_meta_data->height,
ivf_meta_data->framerate,
ivf_meta_data->timescale,
ivf_meta_data->frame_count);
uint64_t data_offset = 32;
int obu_num = 0;
int ivf_frame_num = 0;
printf("----------+-------- OBU Table ---+--------+----------+------------+----------+\n");
printf("IVF F#num | IVF F#size | OBU_NUM | POS | TYPE | OBU_H_SIZE | OBU_SIZE |\n");
printf("----------+------------+---------+--------+----------+------------+----------+\n");
while(!feof(bitstream)) {
unsigned int frame_size = readl32(bitstream);
uint64_t pts = readl64(bitstream);
data_offset += 12;
ivf_frame_num++;
int obu_num_in_ivf_frame = 0;
int sz = frame_size;
while (sz > 0) {
ret = get_obu(obu, sz);
if (ret < 0 || (obu->obu_size <= 0 && obu->obu_type != OBU_TEMPORAL_DELIMITER)) {
fprintf(stderr, "get_obu failed. ret=%d, obu->obu_size=%"PRId64"\n", ret, obu->obu_size);
ret = -1;
goto end;
}
fprintf(stdout,"%10d|%12d|%9d| %7"PRId64"|%10s|%12"PRId64"|%10"PRId64"|\n",
ivf_frame_num, frame_size, obu_num, data_offset, get_obu_type_name(obu->obu_type), obu->obu_header_size, obu->obu_size);
uint64_t obu_total_size = obu->obu_header_size + obu->obu_size;
data_offset += obu_total_size;
sz -= obu_total_size;
obu_num++;
obu_num_in_ivf_frame++;
}
}
end:
//Free
if (obu)
free (obu);
if (ivf_meta_data)
free (ivf_meta_data);
fclose(bitstream);
return ret;
}
void usage() {
fprintf(stderr, "usage: %s \n", appname);
exit(1);
}
int main(int argc, char **argv) {
appname = argv[0];
if (argc < 2) {
usage();
}
int ret = 0;
ret = simplest_av1_parser(argv[1]);
if (ret != 0) {
fprintf(stderr, "parse error, ret=%d", ret);
} else {
fprintf(stdout, "parse finished.");
}
return 0;
}
结果
程序的输入为一个IVF文件的路径,结果的一部分如下图所示。
- 可以看到一个IVF Frame中包含了多个OBU。