之前的文章里已经介绍了音频码流AAC和视频码流H.264,接下来就是要把他们打包并封装格式了。
FLV用途
FLV(Flash Video)是Adobe公司设计开发的一种流行的流媒体格式,由于其视频文件体积轻巧、封装简单等特点,使其很适合在互联网上进行应用。 除了播放视频,在直播时也可以使用。采用FLV格式封装的文件后缀为.flv
FLV封装格式
FLV封装格式和其他格式一样,一个文件头+一个文件体。结构如下图:
文件头
FLV文件头总共有9Byte,文件头部的内容结构如下:
typedef unsigned char byte;
typedef struct {
byte Signature[3];
byte Version;
byte Flags;
uint DataOffset;
} FLV_HEADER;
其中Signature[3]是存储了"F"(0x46),"L"(0x4c),“V”(0x56)签名的3个字节;然后是version版本,表示FLV的版本,占1个字节;Flags是音频流标识,前5位为保留位,第7位同样保留,全为0,第6位表示是否存在音频tag,第8位表示是否存在视频tag。剩下的4个字节为整个文件头所占的大小,数据的起始位置就是从文件开头偏移这么多的大小。
音频tag解析
Flags音频流标志位的第6位是判断是否存在音频tag,音频tag的结构如下:
音频tag开始的第一个字节包含了音频数据的参数信息,从第二个字节开始为音频流。如上图所示,在第1个字节中,前4个bit为音频编码类型:
0 Linear PCM,platform endian
1 ADPCM
2 MP3
3 Linear PCM,little endian
4 Nellymoser 16-kHz mono
5 Nellymoser 8-kHz mono
6 Nellymoser
7 G.711 A-law logarithmic PCM
8 G.711 mu-law logarithmic PCM
9 reserved
10 AAC
14 MP3 8-Khz
15 Device-specific sound
第5、6个bit表示音频采样率:
0 5.5kHz
1 11KHz
2 22 kHz
3 44 kHz
FLV并不支持48KHz的采样率
第7个bit表示音频采样精度:
0 8bits
1 16bits
第8个表示音频类型:
0 sndMono
1 sndStereo
视频tag解析
Flags音频流标志位的第8位是判断是否存在视频tag,视频tag的结构如下:
2个字节第一个字节包含视频数据的参数信息,第2个字节为视频流数据。第一个字节的前4个bit的数值表示帧类型:
1 keyframe (for AVC,a seekable frame)
2 inter frame (for AVC,a nonseekable frame)
3 disposable inter frame (H.263 only)
4 generated keyframe (reserved for server use)
5 video info/command frame
第1位的后4个bit的数值表示视频编码类型:
1 JPEG (currently unused)
2 Sorenson H.263
3 Screen video
4 On2 VP6
5 On2 VP6 with alpha channel
6 Screen video version 2
7 AVC
文件体
FLV文件体结构如下:
FLV文件体是由一连串的back-pointers和tags构成:
Tag
这里的tag同样是由header和data两部分组成,tag header里存放的数据如下:
Tag的类型可以是视频、音频和Script(脚本类型),视频tag和音频tag在上面已经介绍过了,下面介绍一下Script Tag这种类型。
typedef unsigned char byte;
typedef unsigned int uint;
typedef struct {
byte TagType;
byte DataSize[3];
byte Timestamp[3];
uint Reserved;
} TAG_HEADER;
Script Tag
该类型Tag又被称为MetaData Tag,存放一些关于FLV视频和音频的元信息,比如:duration、width、height等。通常该类型Tag会作为FLV文件的第一个tag,并且只有一个,跟在File Header后。该类型Tag Data的结构如下所示:
针对第一个AMF包:第1个字节表示AMF类型,一般为0x02,表示字符串;第2、3个字节用来标识字符长度,一般总是0x000A。后面字节为具体的字符串。
针对第二个AMF包:第1个字节表示AMF包类型,一般总是0x08,表示数组。第2-5个字节表示数组元素个数,后面几位各数组元素的封装,数组元素为(名称-值)组成的对:
duration 时长
width 视频宽度
height 视频高度
videodatarate 视频码率
framerate 视频帧率
videocodecid 视频编码方式
audiosamplerate 音频采样率
audiosamplesize 音频采样精度
stereo 是否为立体声
audiocodecid 音频编码方式
filesize 文件大小
FLV解析
来自雷神
#include
#include
#include
//Important!
#pragma pack(1)
#define TAG_TYPE_SCRIPT 18
#define TAG_TYPE_AUDIO 8
#define TAG_TYPE_VIDEO 9
typedef unsigned char byte;
typedef unsigned int uint;
typedef struct {
byte Signature[3];
byte Version;
byte Flags;
uint DataOffset;
} FLV_HEADER;
typedef struct {
byte TagType;
byte DataSize[3];
byte Timestamp[3];
uint Reserved;
} TAG_HEADER;
//reverse_bytes - turn a BigEndian byte array into a LittleEndian integer
uint reverse_bytes(byte *p, char c) {
int r = 0;
int i;
for (i=0; i>4;
switch (x)
{
case 0:strcat(audiotag_str,"Linear PCM, platform endian");break;
case 1:strcat(audiotag_str,"ADPCM");break;
case 2:strcat(audiotag_str,"MP3");break;
case 3:strcat(audiotag_str,"Linear PCM, little endian");break;
case 4:strcat(audiotag_str,"Nellymoser 16-kHz mono");break;
case 5:strcat(audiotag_str,"Nellymoser 8-kHz mono");break;
case 6:strcat(audiotag_str,"Nellymoser");break;
case 7:strcat(audiotag_str,"G.711 A-law logarithmic PCM");break;
case 8:strcat(audiotag_str,"G.711 mu-law logarithmic PCM");break;
case 9:strcat(audiotag_str,"reserved");break;
case 10:strcat(audiotag_str,"AAC");break;
case 11:strcat(audiotag_str,"Speex");break;
case 14:strcat(audiotag_str,"MP3 8-Khz");break;
case 15:strcat(audiotag_str,"Device-specific sound");break;
default:strcat(audiotag_str,"UNKNOWN");break;
}
strcat(audiotag_str,"| ");
x=tagdata_first_byte&0x0C;
x=x>>2;
switch (x)
{
case 0:strcat(audiotag_str,"5.5-kHz");break;
case 1:strcat(audiotag_str,"1-kHz");break;
case 2:strcat(audiotag_str,"22-kHz");break;
case 3:strcat(audiotag_str,"44-kHz");break;
default:strcat(audiotag_str,"UNKNOWN");break;
}
strcat(audiotag_str,"| ");
x=tagdata_first_byte&0x02;
x=x>>1;
switch (x)
{
case 0:strcat(audiotag_str,"8Bit");break;
case 1:strcat(audiotag_str,"16Bit");break;
default:strcat(audiotag_str,"UNKNOWN");break;
}
strcat(audiotag_str,"| ");
x=tagdata_first_byte&0x01;
switch (x)
{
case 0:strcat(audiotag_str,"Mono");break;
case 1:strcat(audiotag_str,"Stereo");break;
default:strcat(audiotag_str,"UNKNOWN");break;
}
fprintf(myout,"%s",audiotag_str);
//if the output file hasn't been opened, open it.
if(output_a!=0&&afh == NULL){
afh = fopen("output.mp3", "wb");
}
//TagData - First Byte Data
int data_size=reverse_bytes((byte *)&tagheader.DataSize, sizeof(tagheader.DataSize))-1;
if(output_a!=0){
//TagData+1
for (int i=0; i>4;
switch (x)
{
case 1:strcat(videotag_str,"key frame ");break;
case 2:strcat(videotag_str,"inter frame");break;
case 3:strcat(videotag_str,"disposable inter frame");break;
case 4:strcat(videotag_str,"generated keyframe");break;
case 5:strcat(videotag_str,"video info/command frame");break;
default:strcat(videotag_str,"UNKNOWN");break;
}
strcat(videotag_str,"| ");
x=tagdata_first_byte&0x0F;
switch (x)
{
case 1:strcat(videotag_str,"JPEG (currently unused)");break;
case 2:strcat(videotag_str,"Sorenson H.263");break;
case 3:strcat(videotag_str,"Screen video");break;
case 4:strcat(videotag_str,"On2 VP6");break;
case 5:strcat(videotag_str,"On2 VP6 with alpha channel");break;
case 6:strcat(videotag_str,"Screen video version 2");break;
case 7:strcat(videotag_str,"AVC");break;
default:strcat(videotag_str,"UNKNOWN");break;
}
fprintf(myout,"%s",videotag_str);
fseek(ifh, -1, SEEK_CUR);
//if the output file hasn't been opened, open it.
if (vfh == NULL&&output_v!=0) {
//write the flv header (reuse the original file's hdr) and first previoustagsize
vfh = fopen("output.flv", "wb");
fwrite((char *)&flv,1, sizeof(flv),vfh);
fwrite((char *)&previoustagsize_z,1,sizeof(previoustagsize_z),vfh);
}
#if 0
//Change Timestamp
//Get Timestamp
ts = reverse_bytes((byte *)&tagheader.Timestamp, sizeof(tagheader.Timestamp));
ts=ts*2;
//Writeback Timestamp
ts_new = reverse_bytes((byte *)&ts, sizeof(ts));
memcpy(&tagheader.Timestamp, ((char *)&ts_new) + 1, sizeof(tagheader.Timestamp));
#endif
//TagData + Previous Tag Size
int data_size=reverse_bytes((byte *)&tagheader.DataSize, sizeof(tagheader.DataSize))+4;
if(output_v!=0){
//TagHeader
fwrite((char *)&tagheader,1, sizeof(tagheader),vfh);
//TagData
for (int i=0; i
此程序可以分离FLV中的视频码流和音频码流。
首先定义了两个header,FLV_HEADER和TAG_HEADER,然后reverse_bytes()
是将大端存储的byte array翻转为小端存储的整型:
uint reverse_bytes(byte *p, char c) {
int r = 0;
int i;
for (i=0; i
他的用法是这样的:reverse_bytes((byte *)&flv.DataOffset, sizeof(flv.DataOffset))
,然后用一个循环,将内存中的数据先后通过|=
与整型进行比较,从而变成了小端存储的整型。
接着在simplest_flv_parser函数中先读文件头,再读文件体。然后一个do-while大循环,直到读取到文件结尾。利用 _getw()函数 (是查找码流中的整数的)读取Previous tag size,然后fread((void *)&tagheader,sizeof(TAG_HEADER),1,ifh);
读取tag header,再获取tag data的字节数:int tagheader_datasize=tagheader.DataSize[0]*pow(2, 16)+tagheader.DataSize[1]*pow(2, 8)+tagheader.DataSize[2];
,获取时戳:int tagheader_timestamp=tagheader.Timestamp[0]*pow(2, 16)+tagheader.Timestamp[1]*pow(2, 8)+tagheader.Timestamp[2];
接着就在之前读取的tag header中tag typr所在的字节中,进行switch判断:
如果是音频:
case TAG_TYPE_AUDIO:{ //音频
char audiotag_str[100]={0};
strcat(audiotag_str,"| ");
char tagdata_first_byte;
//读取一个字符,音频tag data区域的第一个字节,音频的信息
tagdata_first_byte=fgetc(ifh);
// &操作获取前四位,代表音频格式
int x=tagdata_first_byte&0xF0;
//右移4位
x=x>>4;
//判断音频格式
switch (x)
{
case 0:strcat(audiotag_str,"Linear PCM, platform endian");break;
case 1:strcat(audiotag_str,"ADPCM");break;
case 2:strcat(audiotag_str,"MP3");break;
case 3:strcat(audiotag_str,"Linear PCM, little endian");break;
case 4:strcat(audiotag_str,"Nellymoser 16-kHz mono");break;
case 5:strcat(audiotag_str,"Nellymoser 8-kHz mono");break;
case 6:strcat(audiotag_str,"Nellymoser");break;
case 7:strcat(audiotag_str,"G.711 A-law logarithmic PCM");break;
case 8:strcat(audiotag_str,"G.711 mu-law logarithmic PCM");break;
case 9:strcat(audiotag_str,"reserved");break;
case 10:strcat(audiotag_str,"AAC");break;
case 11:strcat(audiotag_str,"Speex");break;
case 14:strcat(audiotag_str,"MP3 8-Khz");break;
case 15:strcat(audiotag_str,"Device-specific sound");break;
default:strcat(audiotag_str,"UNKNOWN");break;
}
strcat(audiotag_str,"| ");
//获取5~6位,采样率
x=tagdata_first_byte&0x0C;
//右移2位
x=x>>2;
//判断采样率
switch (x)
{
case 0:strcat(audiotag_str,"5.5-kHz");break;
case 1:strcat(audiotag_str,"1-kHz");break;
case 2:strcat(audiotag_str,"22-kHz");break;
case 3:strcat(audiotag_str,"44-kHz");break;
default:strcat(audiotag_str,"UNKNOWN");break;
}
strcat(audiotag_str,"| ");
//获取第7位,采样精度
x=tagdata_first_byte&0x02;
x=x>>1;
switch (x)
{
case 0:strcat(audiotag_str,"8Bit");break;
case 1:strcat(audiotag_str,"16Bit");break;
default:strcat(audiotag_str,"UNKNOWN");break;
}
strcat(audiotag_str,"| ");
//获取第8位,音频声道数
x=tagdata_first_byte&0x01;
switch (x)
{
case 0:strcat(audiotag_str,"Mono");break;
case 1:strcat(audiotag_str,"Stereo");break;
default:strcat(audiotag_str,"UNKNOWN");break;
}
fprintf(myout,"%s",audiotag_str);
//if the output file hasn't been opened, open it.
if(output_a!=0&&afh == NULL){
afh = fopen("output.mp3", "wb");
}
//TagData - First Byte Data
//获取tag Data字节数,需要减去Tag Data区域的第一个字节
int data_size=reverse_bytes((byte *)&tagheader.DataSize, sizeof(tagheader.DataSize))-1;
//循环获取字节写入文件
if(output_a!=0){
//TagData+1
for (int i=0; i
首先先读取文件头,再将音频流导出成mp3格式,看了上面的音频tag应该很好理解这段代码。
如果是视频:
case TAG_TYPE_VIDEO:{ //视频
char videotag_str[100]={0};
strcat(videotag_str,"| ");
//读取TagData区域第一个字节,取出前4位。包含视频帧类型
char tagdata_first_byte;
tagdata_first_byte=fgetc(ifh);
int x=tagdata_first_byte&0xF0;
x=x>>4;
switch (x)
{
case 1:strcat(videotag_str,"key frame ");break;
case 2:strcat(videotag_str,"inter frame");break;
case 3:strcat(videotag_str,"disposable inter frame");break;
case 4:strcat(videotag_str,"generated keyframe");break;
case 5:strcat(videotag_str,"video info/command frame");break;
default:strcat(videotag_str,"UNKNOWN");break;
}
strcat(videotag_str,"| ");
//读取TagData区域第一个字节,取出后4位。包含视频编码类型
x=tagdata_first_byte&0x0F;
switch (x)
{
case 1:strcat(videotag_str,"JPEG (currently unused)");break;
case 2:strcat(videotag_str,"Sorenson H.263");break;
case 3:strcat(videotag_str,"Screen video");break;
case 4:strcat(videotag_str,"On2 VP6");break;
case 5:strcat(videotag_str,"On2 VP6 with alpha channel");break;
case 6:strcat(videotag_str,"Screen video version 2");break;
case 7:strcat(videotag_str,"AVC");break;
default:strcat(videotag_str,"UNKNOWN");break;
}
fprintf(myout,"%s",videotag_str);
fseek(ifh, -1, SEEK_CUR);
//if the output file hasn't been opened, open it.
if (vfh == NULL&&output_v!=0) {
//write the flv header (reuse the original file's hdr) and first previoustagsize
vfh = fopen("output.flv", "wb");
fwrite((char *)&flv,1, sizeof(flv),vfh);
fwrite((char *)&previoustagsize_z,1,sizeof(previoustagsize_z),vfh);
}
#if 0
//Change Timestamp
//Get Timestamp
ts = reverse_bytes((byte *)&tagheader.Timestamp, sizeof(tagheader.Timestamp));
ts=ts*2;
//Writeback Timestamp
ts_new = reverse_bytes((byte *)&ts, sizeof(ts));
memcpy(&tagheader.Timestamp, ((char *)&ts_new) + 1, sizeof(tagheader.Timestamp));
#endif
//TagData + Previous Tag Size
int data_size=reverse_bytes((byte *)&tagheader.DataSize, sizeof(tagheader.DataSize))+4;
if(output_v!=0){
//TagHeader
fwrite((char *)&tagheader,1, sizeof(tagheader),vfh);
//TagData
for (int i=0; i
结果如下: