AV1 视频码流解析

目录

  1. 参考
  2. AV1的码流结构
  3. 码流解析示例

1. 参考

  • [1] AV1 Bitstream & Decoding Process
    Specification#Version 1.0.0 with Errata 1
  • [2] 雷霄骅/视音频数据处理入门:H.264视频码流解析

2. AV1的码流结构

AV1码流由OBU(Open Bitstream Unit)码流单元组成,OBU的结构如下所示。

--------------------------------------
obu_header | *obu_size | obu_payload |
--------------------------------------
  • obu_size指的是obu_payload的字节大小。
  • obu_size不是必须的,由obu_header中的obu_has_size_field字段标识是否存在,不存在时需要由外部告知OBU的大小。
  • 标准[1]中定义的“Low overhead bitstream format”格式要求obu_has_size_field必须为1。
  • 对于需要更容易跳过帧或时间单位的格式的应用程序,[1]定义了Annex B长度分隔的码流格式,obu_has_size_field不为1。
  • 派生的规范,例如支持存储AV1视频的容器格式,应该指出它们所依赖的是哪种格式。
  • 其他打包OBUs视频流的方法也是允许的。

OBU结构的语法如下所示,略去了各种类型obu解析payload的部分[1]。

open_bitstream_unit( sz ) {                                
    obu_header()                                           
    if ( obu_has_size_field ) {                           
        obu_size                                           //leb128()
    } else {
        obu_size = sz - 1 - obu_extension_flag
    }
....
}

obu_header() {                                          
    obu_forbidden_bit                                   // f(1)
    obu_type                                            // f(4)
    obu_extension_flag                                  // f(1)
    obu_has_size_field                                  // f(1)
    obu_reserved_1bit                                   // f(1)
    if ( obu_extension_flag == 1 )
        obu_extension_header()
}

obu_extension_header() {
    temporal_id                     //f(3)
    spatial_id                      //f(2)
    extension_header_reserved_3bits //f(3)
}
  • f(n)表示从流中读取n比特组成的无符号整数,比特位从高到低。
  • obu_header中的obu_extension_flag字段标识是否还有obu_extension_header。
  • obu_type标识OBU的类型,类型的取值如下表所示。
  • leb128()的读取可变长的小字端的无符号整形数,具体过程下面有说明。

obu_type取值

obu_type Name of obu_type
0 Reserved
1 OBU_SEQUENCE_HEADER
2 OBU_TEMPORAL_DELIMITER
3 OBU_FRAME_HEADER
4 OBU_TILE_GROUP
5 OBU_METADATA
6 OBU_FRAME
7 OBU_REDUNDANT_FRAME_HEADER
8 OBU_TILE_LIST
9-14 Reserved
15 OBU_PADDING

leb128()

读取可变长的小字端的无符号整形数。读取一个字节时,如果最高比特为1表示需要读取更多的字节,为0表示这是最后一个字节了。解析过程如下:

leb128() {
    value = 0
    Leb128Bytes = 0
    for (i = 0; i < 8; i++) {
        leb128_byte //f(8)
        value |= ( (leb128_byte & 0x7f) << (i*7) )
        Leb128Bytes += 1
        if ( !(leb128_byte & 0x80) ) {
            break
        }
    }
    return value
}

3. 码流解析示例

示例为解析IVF视频文件格式中的AV1码流,打印OBU的统计信息。IVF视频格式在IVF视频文件格式有介绍。

//https://www.jianshu.com/u/3a66dddbdb3d

#include 
#include 
#include 
#include  

char *appname = NULL;
FILE *bitstream = NULL;                //!< the bit stream file

typedef enum {
    OBU_SEQUENCE_HEADER = 1,
    OBU_TEMPORAL_DELIMITER = 2,
    OBU_FRAME_HEADER = 3,
    OBU_TILE_GROUP = 4,
    OBU_METADATA = 5,
    OBU_FRAME = 6,
    OBU_REDUNDANT_FRAME_HEADER = 7,
    OBU_TILE_LIST = 8,
    OBU_PADDING = 15 
} OBU_TYPE;

const char* get_obu_type_name(OBU_TYPE type) {
    switch (type) {
        case OBU_SEQUENCE_HEADER: return "SEQ_H";
        case OBU_TEMPORAL_DELIMITER: return "TEM_D";
        case OBU_FRAME_HEADER: return "FRA_H";
        case OBU_TILE_GROUP: return "TIL_G";
        case OBU_METADATA: return "MET_D";
        case OBU_FRAME: return "FRAME";
        case OBU_REDUNDANT_FRAME_HEADER: return "R_F_H";
        case OBU_TILE_LIST: return "TIL_L";
        case OBU_PADDING: return "PADDI";
        default:
            return "UNKNOWN";
    }
}

typedef struct {
    uint64_t obu_header_size;
    unsigned obu_type;                                            
    uint64_t obu_size;  //leb128(), contains the size in bytes of the OBU not including the bytes within obu_header or the obu_size syntax
    int extension_flag;
    int has_size_field;

    //extension_flag == 1
    int temporal_id;
    int spatial_id;
} OBU_t;

typedef struct IVFMetaData {
    char sign[5];
    char codec_tag[5];
    unsigned int width;
    unsigned int height;
    unsigned int framerate;
    unsigned int timescale; 
    unsigned int frame_count;
} IVFMetaData;

int64_t read(FILE *f, unsigned char *buf, int64_t size) {
    return fread(buf, 1, size, f);
}

int64_t skip(FILE *f, int64_t offset) {
    return fseek(f, offset, SEEK_CUR);
}

unsigned int read8(FILE *f) {
    unsigned char val;
    if (read(f, &val, 1) == 1) {
        return val; 
    }
    return 0;
}

unsigned int readl16(FILE *f) {
    unsigned int val;
    val = read8(f);
    val |= read8(f) << 8; 
    return val;
}

unsigned int readl32(FILE *f) {
    unsigned int val;
    val = readl16(f);
    val |= readl16(f) << 16; 
    return val;
}

uint64_t readl64(FILE *f) {
    uint64_t val;
    val = readl32(f);
    val |= ((uint64_t)readl32(f)) << 32;
    return val;
}

uint64_t leb128(FILE *f, int *read_bytes_num) {
    uint64_t val = 0;
    int i = 0;
    for (; i < 8; i++) {
        unsigned int leb128_byte = read8(f);
        val |= ( (leb128_byte & 0x7f) << (i*7) );
        if ( !(leb128_byte & 0x80) ) {
            break;
        }
    }
    *read_bytes_num = i + 1;
    return val; 
}

static int ivf_read_header(IVFMetaData *ivf) {
    read(bitstream, ivf->sign, 4);
    if (strcmp(ivf->sign, "DKIF") != 0) {
        fprintf(stderr, "not a IVF file, sign=%s.\n", ivf->sign);
        return -1;
    }
    skip(bitstream, 2); //version
    skip(bitstream, 2); //header size
    read(bitstream, ivf->codec_tag, 4);
    ivf->width = readl16(bitstream);
    ivf->height = readl16(bitstream);  
    ivf->framerate = readl32(bitstream);
    ivf->timescale = readl32(bitstream);
    ivf->frame_count = readl32(bitstream);
    skip(bitstream, 4); //unused
    return 0;   
}

int get_obu(OBU_t *obu, int sz){
    unsigned char obu_header;
    if (read(bitstream, &obu_header, 1) != 1) {
        fprintf(stderr, "read obu_header failed.\n");
        return -1;
    }
    obu->obu_type = (obu_header >> 3) & 0x0F;
    obu->extension_flag = (obu_header >> 2) & 0x01;
    obu->has_size_field = (obu_header >> 1) & 0x01;
        
    if (obu->extension_flag == 1) {
        unsigned char obu_extension_header;
        if (read(bitstream, &obu_extension_header, 1) != 1) {
            fprintf(stderr, "read obu_extension_header failed.\n");
            return -1;
        } else {
            obu->temporal_id = (obu_extension_header >> 5) & 0x07;  
            obu->spatial_id = (obu_extension_header >> 3) & 0x03;   
        }
    }
    int size_field_bytes_num = 0;
    if (obu->has_size_field == 1) {
        obu->obu_size = leb128(bitstream, &size_field_bytes_num);   
    } else {
        obu->obu_size = sz - 1 - obu->extension_flag;
    }
    obu->obu_header_size = 1 + obu->extension_flag + size_field_bytes_num;
    if (obu->obu_size > 0) { 
        if (0 != skip(bitstream, obu->obu_size)){
            fprintf(stderr, "get_obu: cannot seek in the bitstream file");
            return -1;
        }
    }   
    return 0;
}

/**
 * Analysis AV1 Bitstream in IVF file
 * @param url location of input IVF file contains AV1 bitstream.
 */
int simplest_av1_parser(char *url){
    int ret = 0;
    IVFMetaData *ivf_meta_data;
    OBU_t *obu;
 
    bitstream = fopen(url, "rb+");
    if (!bitstream) {
        printf("Open file error\n");
        return -1;
    }
 
    obu = (OBU_t*) calloc (1, sizeof (OBU_t));
    if (!obu) {
        fprintf(stdout, "Alloc OBU_t Error\n");
        return -1;
    }
    ivf_meta_data = (IVFMetaData *) calloc(1, sizeof(IVFMetaData)); 
    if (!ivf_meta_data) {
        fprintf(stdout, "Alloc IVFMetaData Error\n");
        ret = -1;
        goto end;
    }
    if (ivf_read_header(ivf_meta_data) != 0) {
        fprintf(stderr, "read ivf header failed.\n");
        ret = -1;
        goto end;
    }
    fprintf(stdout, "ivf header: sign=%s, codec_tag=%s, width=%d, height=%d, "
        " framerate=%d, timescale=%d, frame_count=%d\n", 
        ivf_meta_data->sign, 
        ivf_meta_data->codec_tag, 
        ivf_meta_data->width, 
        ivf_meta_data->height, 
        ivf_meta_data->framerate, 
        ivf_meta_data->timescale, 
        ivf_meta_data->frame_count);

    uint64_t data_offset = 32;
    int obu_num = 0;
    int ivf_frame_num = 0;

    printf("----------+-------- OBU Table ---+--------+----------+------------+----------+\n");
    printf("IVF F#num | IVF F#size | OBU_NUM |   POS  |   TYPE   | OBU_H_SIZE | OBU_SIZE |\n");
    printf("----------+------------+---------+--------+----------+------------+----------+\n");

    while(!feof(bitstream)) {
        unsigned int frame_size = readl32(bitstream);
        uint64_t pts = readl64(bitstream);
        data_offset += 12;
        ivf_frame_num++; 
        
        int obu_num_in_ivf_frame = 0;
        int sz = frame_size;
        while (sz > 0) {
            ret = get_obu(obu, sz);
            if (ret < 0 || (obu->obu_size <= 0 && obu->obu_type != OBU_TEMPORAL_DELIMITER)) {
                fprintf(stderr, "get_obu failed. ret=%d, obu->obu_size=%"PRId64"\n", ret, obu->obu_size);
                ret = -1;
                goto end;
            }
            fprintf(stdout,"%10d|%12d|%9d| %7"PRId64"|%10s|%12"PRId64"|%10"PRId64"|\n", 
                    ivf_frame_num, frame_size, obu_num, data_offset, get_obu_type_name(obu->obu_type), obu->obu_header_size,  obu->obu_size);
            uint64_t obu_total_size = obu->obu_header_size + obu->obu_size;
            data_offset += obu_total_size;
            sz -= obu_total_size;
            obu_num++;
            obu_num_in_ivf_frame++;
        }
    }
    
end:
    //Free
    if (obu)
        free (obu);
    if (ivf_meta_data)
        free (ivf_meta_data);   
    fclose(bitstream);
    return ret;
}

void usage() {
    fprintf(stderr, "usage: %s \n", appname);
    exit(1);
}

int main(int argc, char **argv) {
    appname = argv[0];    
    if (argc < 2) {
        usage();
    }
    int ret = 0;
    ret = simplest_av1_parser(argv[1]);
    if (ret != 0) {
        fprintf(stderr, "parse error, ret=%d", ret);
    } else {
        fprintf(stdout, "parse finished.");
    }
    return 0;
}

结果
程序的输入为一个IVF文件的路径,结果的一部分如下图所示。

obu_parser.png

  • 可以看到一个IVF Frame中包含了多个OBU。

你可能感兴趣的:(AV1 视频码流解析)