[Video and Audio Data Processing] FLV封装格式解析

0. FLV介绍

[Video and Audio Data Processing] FLV封装格式解析_第1张图片
(https://zh.wikipedia.org/wiki/Flash_Video#tag%E5%9F%BA%E6%9C%AC%E6%A0%BC%E5%BC%8F)

FLV包括文件头(File Header)和文件体(File Body)两部分。
[Video and Audio Data Processing] FLV封装格式解析_第2张图片

0.1 音频tag

音频Tag开始的第1个字节包含了音频数据的参数信息,从第2个字节开始为音频流数据。

在这里插入图片描述
[Video and Audio Data Processing] FLV封装格式解析_第3张图片

0.2 视频tag

视频Tag也用开始的第1个字节包含视频数据的参数信息,从第2个字节为视频流数据。

在这里插入图片描述
[Video and Audio Data Processing] FLV封装格式解析_第4张图片

0.3 Script Tag(控制帧)

该类型Tag又通常被称为Metadata Tag,会放一些关于FLV视频和音频的元数据信息如:duration、width、height等。通常该类型Tag会跟在File Header后面作为第一个Tag出现,而且只有一个。

在这里插入图片描述
第一个AMF包:

第1个字节表示AMF包类型,一般总是0x02,表示字符串。第2-3个字节为UI16类型值,标识字符串的长度,一般总是0x000A(“onMetaData”长度)。后面字节为具体的字符串,一般总为“onMetaData”(6F,6E,4D,65,74,61,44,61,74,61)。

第二个AMF包:

第1个字节表示AMF包类型,一般总是0x08,表示数组。第2-5个字节为UI32类型值,表示数组元素的个数。后面即为各数组元素的封装,数组元素为元素名称和值组成的对。

[Video and Audio Data Processing] FLV封装格式解析_第5张图片

1. 代码

以下是代码,写了注解。

extern "C"
{
#ifdef __cplusplus
#define __STDC_CONSTANT_MACROS

#endif

}
extern "C" {

#include 
#include 
#include 
#include 
}


//Important!
#pragma pack(1)


#define TAG_TYPE_SCRIPT 18  //0x12
#define TAG_TYPE_AUDIO  8  //0x8
#define TAG_TYPE_VIDEO  9  //0x9

typedef unsigned char byte;
typedef unsigned int uint;

typedef struct { //header部分记录了flv的类型、版本等信息,是flv的开头,一般都差不多,占9bytes
	byte Signature[3];//文件标识(signature),总是为”FLV”,0x46 0x4c 0x56(ASCII码)
	byte Version;//版本(version),目前为0x01
	byte Flags;//流信息(TypeFlags),文件的标志位说明。前5位保留,必须为0;第6位为音频Tag:
	//1表示有音频;第七位保留,为0; 第8位为视频Tag:1表示有视频
		uint DataOffset;//Header长度(HeaderSize),整个header的长度,一般为9(版本为0x01时);
		//大于9表示下面还有扩展信息。即0x00000009
} FLV_HEADER;// 3+1+1+4=9

typedef struct { //Tag Header里存放的是当前Tag的类型
	byte TagType; //Tag类型(Type),0x08:音频; 0x09:视频; 0x12:脚本; 其他:保留
	byte DataSize[3];//数据区长度(DataSize),在数据区的长度
	byte Timestamp[3];//时间戳(Timestamp),整数,单位是毫秒。对于脚本型的tag总是0 (CTS)
	uint Reserved;//分为时间戳扩展(Timestamp_ex占一个字节和 StreamsID占三个字节,总为0
} TAG_HEADER;


//reverse_bytes - turn a BigEndian byte array into a LittleEndian integer
//大端存储转小端存储,举例0x 00 00 00 09,p[0]=0x00、p[1]=0x00、p[2]=0x00、p[3]=0x09
//假设c为4
//这个代码就是把总结果换算出来
//*(p + i) p[0]的值,左移3*8- 8*0=24位
//p[1]的值,左移2*8位
//p[2]的值,左移8*1位
//p[3]的值,不左移
uint reverse_bytes(byte* p, char c) {
	int r = 0;
	int i;
	for (i = 0; i < c; i++)
		r |= (*(p + i) << (((c - 1) * 8) - 8 * i));
	return r;
}

/**
 * Analysis FLV file
 * @param url    Location of input FLV file.
 */

int simplest_flv_parser(const char* url) {

	//whether output audio/video stream
	int output_a = 1;
	int output_v = 1;
	//-------------
	FILE* ifh = NULL, * vfh = NULL, * afh = NULL;

	//FILE *myout=fopen("output_log.txt","wb+");
	FILE* myout = stdout;

	FLV_HEADER flv; //flv文件头
	TAG_HEADER tagheader; //tag头
	uint previoustagsize, previoustagsize_z = 0;
	uint ts = 0, ts_new = 0;

	ifh = fopen(url, "rb+");
	if (ifh == NULL) {
		printf("Failed to open files!");
		return -1;
	}

	//FLV file header
	fread((char*)&flv, 1, sizeof(FLV_HEADER), ifh);
	//从文件中读取sizeof(FLV_HEADER)*1个字节的数据存到&flv buffer里面,

	fprintf(myout, "============== FLV Header ==============\n");
	fprintf(myout, "Signature:  0x %c %c %c\n", flv.Signature[0],
		flv.Signature[1], flv.Signature[2]);
	fprintf(myout, "Version:    0x %X\n", flv.Version);
	fprintf(myout, "Flags  :    0x %X\n", flv.Flags);
	fprintf(myout, "HeaderSize: 0x %X\n",
		reverse_bytes((byte*)&flv.DataOffset, sizeof(flv.DataOffset)));
	fprintf(myout, "========================================\n");

	//move the file pointer to the end of the header
	fseek(ifh, reverse_bytes((byte*)&flv.DataOffset,
		sizeof(flv.DataOffset)), SEEK_SET);

	//process each tag
	do {

		previoustagsize = _getw(ifh);
		//此句有必要,因为多四个字节的整型0数据,去掉表明前一个tag长度

		fread((void*)&tagheader, sizeof(TAG_HEADER), 1, ifh);
		//		读1*sizeof(TAG_HEADER)个字节的 tag头


		int temp_datasize1=reverse_bytes((byte *)&tagheader.DataSize, \
		sizeof(tagheader.DataSize));
		int tagheader_datasize = tagheader.DataSize[0] * 65536 +
			tagheader.DataSize[1] * 256 + tagheader.DataSize[2];
		//大端存储,计算数据区长度tagheader.DataSize[0]*2^16+tagheader.DataSize[1]*2^8+
		//tagheader.DataSize[2]

		int tagheader_timestamp = tagheader.Timestamp[0] * 65536 +
			tagheader.Timestamp[1] * 256 + tagheader.Timestamp[2];
		//大端存储,计算时间戳,tagheader.Timestamp[0]*2^16+tagheader.Timestamp[1]*2^8+
		//tagheader.Timestamp[2]

		char tagtype_str[10];
		switch (tagheader.TagType) {
			//判断Tag类型(Type),0x08:音频; 0x09:视频; 0x12:脚本
		case TAG_TYPE_AUDIO:sprintf(tagtype_str, "AUDIO"); break;
		case TAG_TYPE_VIDEO:sprintf(tagtype_str, "VIDEO"); break;
		case TAG_TYPE_SCRIPT:sprintf(tagtype_str, "SCRIPT"); break;
		default:sprintf(tagtype_str, "UNKNOWN"); break;
		}
		fprintf(myout, "[%6s] %6d %6d |", tagtype_str,
			tagheader_datasize, tagheader_timestamp);
		//tagheader_datasize表示数据区长度,tagheader_timestamp表示时间戳

				//if we are not past the end of file, process the tag
		if (feof(ifh)) {
			break;
		}

		//process tag by type
		switch (tagheader.TagType) {

		case TAG_TYPE_AUDIO: {
			char audiotag_str[100] = { 0 };
			strcat(audiotag_str, "| ");
			char tagdata_first_byte;
			tagdata_first_byte = fgetc(ifh);
			//读一个字节数据,文件流指针后移
			int x = tagdata_first_byte & 0xF0;
			x = x >> 4;
			//确认音频编码类型
			switch (x)
			{
			case 0:strcat(audiotag_str, "Linear PCM, platform endian"); break;
			case 1:strcat(audiotag_str, "ADPCM"); break;
			case 2:strcat(audiotag_str, "MP3"); break;
			case 3:strcat(audiotag_str, "Linear PCM, little endian"); break;
			case 4:strcat(audiotag_str, "Nellymoser 16-kHz mono"); break;
			case 5:strcat(audiotag_str, "Nellymoser 8-kHz mono"); break;
			case 6:strcat(audiotag_str, "Nellymoser"); break;
			case 7:strcat(audiotag_str, "G.711 A-law logarithmic PCM"); break;
			case 8:strcat(audiotag_str, "G.711 mu-law logarithmic PCM"); break;
			case 9:strcat(audiotag_str, "reserved"); break;
			case 10:strcat(audiotag_str, "AAC"); break;
			case 11:strcat(audiotag_str, "Speex"); break;
			case 14:strcat(audiotag_str, "MP3 8-Khz"); break;
			case 15:strcat(audiotag_str, "Device-specific sound"); break;
			default:strcat(audiotag_str, "UNKNOWN"); break;
			}
			strcat(audiotag_str, "| ");
			x = tagdata_first_byte & 0x0C;
			x = x >> 2;
			//确认采样率
			switch (x)
			{
			case 0:strcat(audiotag_str, "5.5-kHz"); break;
			case 1:strcat(audiotag_str, "1-kHz"); break;
			case 2:strcat(audiotag_str, "22-kHz"); break;
			case 3:strcat(audiotag_str, "44-kHz"); break;
			default:strcat(audiotag_str, "UNKNOWN"); break;
			}
			strcat(audiotag_str, "| ");
			x = tagdata_first_byte & 0x02;
			x = x >> 1;
			//确认音频采样精度,位深度
			switch (x)
			{
			case 0:strcat(audiotag_str, "8Bit"); break;
			case 1:strcat(audiotag_str, "16Bit"); break;
			default:strcat(audiotag_str, "UNKNOWN"); break;
			}
			strcat(audiotag_str, "| ");
			x = tagdata_first_byte & 0x01;
			//确认声道数,单声道,双声道还是未知
			switch (x)
			{
			case 0:strcat(audiotag_str, "Mono"); break;
			case 1:strcat(audiotag_str, "Stereo"); break;
			default:strcat(audiotag_str, "UNKNOWN"); break;
			}
			fprintf(myout, "%s", audiotag_str);

			//if the output file hasn't been opened, open it.
			if (output_a != 0 && afh == NULL) {
				afh = fopen("output.mp3", "wb");
			}

			//TagData - First Byte Data
			int data_size = reverse_bytes((byte*)&tagheader.DataSize,
				sizeof(tagheader.DataSize)) - 1;
			//这里-1应该指的是mp3的数据不需要flv部分的文件头
			//可参阅这个博客https://blog.csdn.net/chgaowei/article/details/51243345
			if (output_a != 0) {
				//TagData+1
		//fgetc(ifh)读取一个字节数据,光标后移一个字节,fgetc返回值是得到的字符值

				for (int i = 0; i < data_size; i++)
					fputc(fgetc(ifh), afh);
				//然后通过fputc函数,把读到的字符值传给afh

			}
			else {
				for (int i = 0; i < data_size; i++)
					fgetc(ifh);
			}
			break;
		}
		case TAG_TYPE_VIDEO: {
			char videotag_str[100] = { 0 };
			strcat(videotag_str, "| ");
			char tagdata_first_byte;
			tagdata_first_byte = fgetc(ifh);
			int x = tagdata_first_byte & 0xF0;
			x = x >> 4;
			//表明帧类型
			switch (x)
			{
			case 1:strcat(videotag_str, "key frame  "); break;
			case 2:strcat(videotag_str, "inter frame"); break;
			case 3:strcat(videotag_str, "disposable inter frame"); break;
			case 4:strcat(videotag_str, "generated keyframe"); break;
			case 5:strcat(videotag_str, "video info/command frame"); break;
			default:strcat(videotag_str, "UNKNOWN"); break;
			}
			strcat(videotag_str, "| ");
			x = tagdata_first_byte & 0x0F;
			//视频编码类型
			switch (x)
			{
			case 1:strcat(videotag_str, "JPEG (currently unused)"); break;
			case 2:strcat(videotag_str, "Sorenson H.263"); break;
			case 3:strcat(videotag_str, "Screen video"); break;
			case 4:strcat(videotag_str, "On2 VP6"); break;
			case 5:strcat(videotag_str, "On2 VP6 with alpha channel"); break;
			case 6:strcat(videotag_str, "Screen video version 2"); break;
			case 7:strcat(videotag_str, "AVC"); break;
			default:strcat(videotag_str, "UNKNOWN"); break;
			}
			fprintf(myout, "%s", videotag_str);

			fseek(ifh, -1, SEEK_CUR);
			//if the output file hasn't been opened, open it.
			if (vfh == NULL && output_v != 0) {
		//write the flv header (reuse the original file's hdr) and first previoustagsize
				vfh = fopen("output.flv", "wb");
				fwrite((char*)&flv, 1, sizeof(flv), vfh);
				fwrite((char*)&previoustagsize_z, 1, sizeof(previoustagsize_z), vfh);
			}
#if 0
			//Change Timestamp
			//Get Timestamp
			ts = reverse_bytes((byte*)&tagheader.Timestamp, sizeof(tagheader.Timestamp));
			ts = ts * 2;
			//Writeback Timestamp
			ts_new = reverse_bytes((byte*)&ts, sizeof(ts));
			memcpy(&tagheader.Timestamp, ((char*)&ts_new) + 1, sizeof(tagheader.Timestamp));
#endif


			//TagData + Previous Tag Size
			int data_size = reverse_bytes((byte*)&tagheader.DataSize,
				sizeof(tagheader.DataSize)) + 4;

			//+4会导致把下一个Previous Tag Size也读到输出文件中了,
			//所以后面代码把光标-4,fseek(ifh, -4, SEEK_CUR);
			if (output_v != 0) {
				//TagHeader
				//先把视频的头写到视频的输出部分
				fwrite((char*)&tagheader, 1, sizeof(tagheader), vfh);
				//TagData
				for (int i = 0; i < data_size; i++)
					fputc(fgetc(ifh), vfh);
			}
			else {
				for (int i = 0; i < data_size; i++)
					fgetc(ifh);
			}
			//rewind 4 bytes, because we need to read the previoustagsize 
			//again for the loop's sake
			fseek(ifh, -4, SEEK_CUR);

			break;
		}
		default:

			//skip the data of this tag
			fseek(ifh, reverse_bytes((byte*)&tagheader.DataSize,
				sizeof(tagheader.DataSize)), SEEK_CUR);

		}

		fprintf(myout, "\n");

	} while (!feof(ifh));


	_fcloseall();

	return 0;
}

int main()
{
	simplest_flv_parser("cuc_ieschool.flv");
	return 0;
}

运行结果:

[Video and Audio Data Processing] FLV封装格式解析_第6张图片

2. 重点代码介绍

previoustagsize = _getw(ifh);

此句有必要,因为多四个字节的整型0数据,去掉表明前一个tag长度

可以用二进制软件打开flv源文件查看,previoustagsize是int型,占四个字节,要把它去掉。
[Video and Audio Data Processing] FLV封装格式解析_第7张图片

int _getw(
   FILE *stream
);

_getw返回读取的整数值,并且递增关联的文件指针。

参考链接:

  1. https://blog.csdn.net/leixiaohua1020/article/details/17934487
  2. https://blog.csdn.net/leixiaohua1020/article/details/50535230
  3. https://blog.csdn.net/u013010310/article/details/52415147

你可能感兴趣的:(视音频数据处理)