音频编解码基础(wav/aac/pcma/pcmu)

技术在于交流、沟通,转载请注明出处并保持作品的完整性。

原文:https://blog.csdn.net/hiwubihe/article/details/81258879

[音频编解码系列文章]

  1. 音频编解码基础
  2. FFMPEG实现音频重采样
  3. FFMPEG实现PCM编码(采用封装格式实现)
  4. FFMPEG实现PCM编码(不采用封装格式实现)
  5. FAAC库实现PCM编码
  6. FAAD库实现RAW格式AAC解码
  7. FAAD库实现RAW格式AAC封装成ADTS格式
  8. FAAD库实现ADTS格式解码
  9. FFMPEG实现对AAC解码(采用封装格式实现)
  10. FFMPEG实现对AAC解码(不采用封装格式实现)

本文介绍音频处理基础知识,介绍常见的音频问题处理。主要包含以下内容

  • WAV头解析并保持PCM
  • PCM文件加WAV头
  • ADTS格式AAC帧获取
  • PCM转G711A/G711U


WAV格式介绍

WAV是微软的RIFF文件的一个特例,通常由一个文件头和若干个CHUNK组成,通常是由RIFF文件头类型“WAVE”子chunk为“fmt”,“data”,和可选数量的chunk组成。format如下:

                                            音频编解码基础(wav/aac/pcma/pcmu)_第1张图片

一段WAV格式实例

		52 49 46 46 24 08 00 00 57 41 56 45 66 6d 74 20 10 00 00 00 01 00 02 00 
		22 56 00 00 88 58 01 00 04 00 10 00 64 61 74 61 00 08 00 00 00 00 00 00 
		24 17 1e f3 3c 13 3c 14 16 f9 18 f9 34 e7 23 a6 3c f2 24 f2 11 ce 1a 0d 

结构分析如下图

                             音频编解码基础(wav/aac/pcma/pcmu)_第2张图片

 

注意事项

       1.二进制数据都是"小端"存储方式。

       2.样本存储8位范围为(0-255),样本存储格式16位范围(-32768-32767)

WAV头解析并保持PCM

WAV文件是一种WINDOS riff文件,只是对PCM加了一个头,没做压缩处理,加完头后一般播放就能播放了。DEMO代码如下:

/*******************************************************************************
Copyright (c) wubihe Tech. Co., Ltd. All rights reserved.
--------------------------------------------------------------------------------

Date Created:	2014-10-25
Author:			wubihe QQ:1269122125 Email:[email protected]
Description:	本例子解析WAV头,并打印音频相关信息
--------------------------------------------------------------------------------
Modification History
DATE          AUTHOR          DESCRIPTION
--------------------------------------------------------------------------------

********************************************************************************/

#include 
#include 
#include 
#include 
#include 

#ifndef HAVE_INT32_T
typedef signed int int32_t;
#endif
#ifndef HAVE_INT16_T
typedef signed short int16_t;
#endif
#ifndef HAVE_U_INT32_T
typedef unsigned int u_int32_t;
#endif
#ifndef HAVE_U_INT16_T
typedef unsigned short u_int16_t;
#endif

#ifdef WORDS_BIGENDIAN
# define UINT32(x) SWAP32(x)
# define UINT16(x) SWAP16(x)
#else
# define UINT32(x) (x)
# define UINT16(x) (x)
#endif

typedef struct
{
	FILE *f;
	int channels;
	int samplebytes;
	int samplerate;
	int samples;
	int bigendian;
	int isfloat;
} pcmfile_t;


typedef struct
{
	u_int32_t label;           /* 'RIFF' */
	u_int32_t length;        /* Length of rest of file */
	u_int32_t chunk_type;      /* 'WAVE' */
}
riff_t;

typedef struct
{
	u_int32_t label;
	u_int32_t len;
}
riffsub_t;

#ifdef _MSC_VER
#pragma pack(push, 1)
#endif

#define WAVE_FORMAT_PCM		1
#define WAVE_FORMAT_FLOAT	3
#define WAVE_FORMAT_EXTENSIBLE	0xfffe
#define INPUT_FILE ("huangdun.wav")
#define OUTPUT_FILE ("huangdun_r48000_FMT_S16_c2.pcm")
struct WAVEFORMATEX
{
	u_int16_t wFormatTag;
	u_int16_t nChannels;
	u_int32_t nSamplesPerSec;
	u_int32_t nAvgBytesPerSec;
	u_int16_t nBlockAlign;
	u_int16_t wBitsPerSample;
	u_int16_t cbSize;
}
#ifdef __GNUC
__attribute__((packed))
#endif
;

struct WAVEFORMATEXTENSIBLE
{
	struct WAVEFORMATEX Format;
	union {
		u_int16_t wValidBitsPerSample;	// bits of precision
		u_int16_t wSamplesPerBlock;		// valid if wBitsPerSample==0
		u_int16_t wReserved;		// If neither applies, set to zero.
	} Samples;
	u_int32_t dwChannelMask;		// which channels are present in stream
	unsigned char SubFormat[16];		// guid
}
#ifdef __GNUC
__attribute__((packed))
#endif
;

#ifdef _MSC_VER
#pragma pack(pop)
#endif

static unsigned char waveformat_pcm_guid[16] =
{
	WAVE_FORMAT_PCM,0,0,0,
	0x00, 0x00,
	0x10, 0x00,
	0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71
};

static void unsuperr(const char *name)
{
	fprintf(stderr, "%s: file format not supported\n", name);
}


pcmfile_t *wav_open_read(const char *name, int rawinput)
{
	int i;
	int skip;
	FILE *wave_f;
	riff_t riff;
	riffsub_t riffsub;
	struct WAVEFORMATEXTENSIBLE wave;
	char *riffl = "RIFF";
	char *wavel = "WAVE";
	char *bextl = "BEXT";
	char *fmtl = "fmt ";
	char *datal = "data";
	int fmtsize;
	pcmfile_t *sndf;
	int dostdin = 0;

	if (!strcmp(name, "-"))
	{
#ifdef _WIN32
		_setmode(_fileno(stdin), O_BINARY);
#endif
		wave_f = stdin;
		dostdin = 1;
	}
	else if (!(wave_f = fopen(name, "rb")))
	{
		perror(name);
		return NULL;
	}

	if (!rawinput) // header input
	{
		if (fread(&riff, 1, sizeof(riff), wave_f) != sizeof(riff))
			return NULL;
		if (memcmp(&(riff.label), riffl, 4))
			return NULL;
		if (memcmp(&(riff.chunk_type), wavel, 4))
			return NULL;

		// handle broadcast extensions. added by pro-tools,otherwise it must be fmt chunk.
		if (fread(&riffsub, 1, sizeof(riffsub), wave_f) != sizeof(riffsub))
			return NULL;
		riffsub.len = UINT32(riffsub.len);

		if (!memcmp(&(riffsub.label), bextl, 4))
		{
			fseek(wave_f, riffsub.len, SEEK_CUR);

			if (fread(&riffsub, 1, sizeof(riffsub), wave_f) != sizeof(riffsub))
				return NULL;
			riffsub.len = UINT32(riffsub.len);
		}

		if (memcmp(&(riffsub.label), fmtl, 4))
			return NULL;
		memset(&wave, 0, sizeof(wave));

		fmtsize = (riffsub.len < sizeof(wave)) ? riffsub.len : sizeof(wave);
		if (fread(&wave, 1, fmtsize, wave_f) != fmtsize)
			return NULL;

		for (skip = riffsub.len - fmtsize; skip > 0; skip--)
			fgetc(wave_f);

		for (i = 0;; i++)
		{
			if (fread(&riffsub, 1, sizeof(riffsub), wave_f) != sizeof(riffsub))
				return NULL;
			riffsub.len = UINT32(riffsub.len);
			if (!memcmp(&(riffsub.label), datal, 4))
				break;
			if (i > 10)
				return NULL;

			for (skip = riffsub.len; skip > 0; skip--)
				fgetc(wave_f);
		}
		if (UINT16(wave.Format.wFormatTag) != WAVE_FORMAT_PCM && UINT16(wave.Format.wFormatTag) != WAVE_FORMAT_FLOAT)
		{
			if (UINT16(wave.Format.wFormatTag) == WAVE_FORMAT_EXTENSIBLE)
			{
				if (UINT16(wave.Format.cbSize) < 22) // struct too small
					return NULL;
				if (memcmp(wave.SubFormat, waveformat_pcm_guid, 16))
				{
					waveformat_pcm_guid[0] = WAVE_FORMAT_FLOAT;
					if (memcmp(wave.SubFormat, waveformat_pcm_guid, 16))
					{          
						unsuperr(name);
						return NULL;
					}
				}
			}
			else
			{
				unsuperr(name);
				return NULL;
			}
		}
	}

	sndf = (pcmfile_t *)malloc(sizeof(*sndf));
	memset(sndf, 0, sizeof(*sndf));
	sndf->f = wave_f;

	if (UINT16(wave.Format.wFormatTag) == WAVE_FORMAT_FLOAT) {
		sndf->isfloat = 1;
	} else {
		sndf->isfloat = (wave.SubFormat[0] == WAVE_FORMAT_FLOAT);
	}
	if (rawinput)
	{
		sndf->bigendian = 1;
		if (dostdin)
			sndf->samples = 0;
		else
		{
			fseek(sndf->f, 0 , SEEK_END);
			sndf->samples = ftell(sndf->f);
			rewind(sndf->f);
		}
	}
	else
	{
		sndf->bigendian = 0;
		sndf->channels = UINT16(wave.Format.nChannels);
		sndf->samplebytes = UINT16(wave.Format.wBitsPerSample) / 8;
		sndf->samplerate = UINT32(wave.Format.nSamplesPerSec);
		sndf->samples = riffsub.len / (sndf->samplebytes * sndf->channels);
	}
	return sndf;
}

int wav_close(pcmfile_t *sndf)
{
	int i = fclose(sndf->f);
	free(sndf);
	return i;
}


int main()
{
	FILE *fpout;

	fpout=fopen(OUTPUT_FILE,"wb+");
	if(fpout == NULL) 
	{
		printf("Create pcm file error\n");
		return -1;
	}

	pcmfile_t * pPcmFile = wav_open_read(INPUT_FILE, 0);

	printf("channels:%1d\t samplebytes:%1d\t samplerate:%06d\t samples:%07d\t bigendian:%1d\n",pPcmFile->channels,pPcmFile->samplebytes,pPcmFile->samplerate,pPcmFile->samples,pPcmFile->bigendian);

	unsigned short usSample;
	
	while(!feof(pPcmFile->f))
	{
		fread(&usSample,sizeof(unsigned short),1,pPcmFile->f);
		fwrite(&usSample,sizeof(unsigned short),1,fpout);
		
	}


	wav_close(pPcmFile);
	fclose(fpout);
	printf("Parser WAV Success!!");
	getchar();

	return 0;
}

运行结果保存PCM文件

 

PCM文件加WAV头

ADTS格式AAC帧获取

ADTS结构

ADTSHeader结构

/*
 * ADTS Header: 
 *  MPEG-2 version 56 bits (byte aligned) 
 *  MPEG-4 version 56 bits (byte aligned) - note - changed for 0.99 version
 *
 * syncword						12 bits
 * id							1 bit
 * layer						2 bits
 * protection_absent			1 bit
 * profile						2 bits
 * sampling_frequency_index		4 bits
 * private						1 bit
 * channel_configuraton			3 bits
 * original						1 bit
 * home							1 bit
 * copyright_id					1 bit
 * copyright_id_start			1 bit
 * aac_frame_length				13 bits
 * adts_buffer_fullness			11 bits
 * num_raw_data_blocks			2 bits
 *
 * if (protection_absent == 0)
 *	crc_check					16 bits
 */

解析ADTS DEMO代码:

/*******************************************************************************
Copyright (c) wubihe Tech. Co., Ltd. All rights reserved.
--------------------------------------------------------------------------------

Date Created:	2014-10-25
Author:			wubihe QQ:1269122125 Email:[email protected]
Description:	本例子解析ADTS封装的AAC结构,获取AAC一帧数据,并打印帧信息
--------------------------------------------------------------------------------
Modification History
DATE          AUTHOR          DESCRIPTION
--------------------------------------------------------------------------------

********************************************************************************/

#include 
#include 
#include 


#define INPUT_FILE_NAME		 ("huangdun.aac")
#define MIN_ADTS_HEAD_LEN    (7)
#define AAC_BUFFER_SIZE		 (8192)
#define NUM_ADTS_SAMPLING_RATES (16)
unsigned int AdtsSamplingRates[NUM_ADTS_SAMPLING_RATES] = 
{
	96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 
	16000, 12000, 11025, 8000, 7350, 0, 0, 0
};
typedef struct 
{
	//当前缓存总数据量
	long bytes_into_buffer;
	//当前缓存已经消耗数据量
	long bytes_consumed;
	//整个文件数据使用量
	long file_offset;
	//缓存
	unsigned char *buffer;
	//文件结束标志
	int  at_eof;
	//文件操作句柄
	FILE *infile;
} aac_buffer;

//aac数据缓存  
aac_buffer g_AacBuffer;


static int fill_buffer(aac_buffer *b)
{
	int bread;

	//解析消耗数据
	if (b->bytes_consumed > 0)
	{
		//有剩余数据 向前面移动
		if (b->bytes_into_buffer)
		{
			memmove((void*)b->buffer, (void*)(b->buffer + b->bytes_consumed),
				b->bytes_into_buffer*sizeof(unsigned char));
		}

		if (!b->at_eof)
		{
			bread = fread((void*)(b->buffer + b->bytes_into_buffer), 1,
				b->bytes_consumed, b->infile);

			if (bread != b->bytes_consumed)
				b->at_eof = 1;

			b->bytes_into_buffer += bread;
		}

		b->bytes_consumed = 0;

		if (b->bytes_into_buffer > 3)
		{
			if (memcmp(b->buffer, "TAG", 3) == 0)
				b->bytes_into_buffer = 0;
		}
		if (b->bytes_into_buffer > 11)
		{
			if (memcmp(b->buffer, "LYRICSBEGIN", 11) == 0)
				b->bytes_into_buffer = 0;
		}
		if (b->bytes_into_buffer > 8)
		{
			if (memcmp(b->buffer, "APETAGEX", 8) == 0)
				b->bytes_into_buffer = 0;
		}
	}

	return 1;
}

static void advance_buffer(aac_buffer *b, int bytes)
{
	b->file_offset += bytes;
	b->bytes_consumed = bytes;
	b->bytes_into_buffer -= bytes;
	if (b->bytes_into_buffer < 0)
		b->bytes_into_buffer = 0;
}

//返回值-1出错 1帧数据长度不够 0返回完整一帧
int getADTSframe(unsigned char* buffer, int buf_size,int *ipConsume, unsigned char* data ,int* data_size)
{
	int size = 0;
	int iConsume=0;

	if(!buffer || !data || !data_size )
	{
		return -1;
	}

	while(1)
	{
		if(buf_size  < 7 )
		{
			*ipConsume = iConsume;
			return 1;
		}
		//Sync words
		if((buffer[0] == 0xff) && ((buffer[1] & 0xf0) == 0xf0) )
		{
			size |= ((buffer[3] & 0x03) <<11);     //high 2 bit
			size |= buffer[4]<<3;                //middle 8 bit
			size |= ((buffer[5] & 0xe0)>>5);        //low 3bit
			break;
		}
		iConsume++;
		--buf_size;
		++buffer;
	}

	if(buf_size < size)
	{
		*ipConsume = iConsume;
		return 1;
	}

	iConsume+=size;
	*ipConsume = iConsume;

	memcpy(data, buffer, size);
	*data_size = size;

	return 0;
}

typedef struct _AdtsHeadInfo
{
	int iProfile;
	int iChans;
	int iSampleRate;
	int iHeadLen;
	int iFrameSize;
}AdtsHeadInfo;


bool ParserAdtsHead(unsigned char*pData,int iDataLen,AdtsHeadInfo*pHeadInfo)
{
	if(iDataLeniHeadLen = 9;
	}
	else
	{
		pHeadInfo->iHeadLen = MIN_ADTS_HEAD_LEN;
	}

	//帧长度
	pHeadInfo->iFrameSize = (((unsigned short)(pData[3] & 0x3)) << 11) 
		| (((unsigned short)pData[4]) << 3) | (pData[5] >> 5); 
	//AAC等级
	pHeadInfo->iProfile = ((pData[2] & 0xc0) >> 6);
	//通道数
	pHeadInfo->iChans   = ((pData[2] & 0x1) << 2) | ((pData[3] & 0xc0) >> 6);

	unsigned char ucSampleRateIndex = (pData[2] & 0x3c) >> 2;
	if((ucSampleRateIndex<0)||(ucSampleRateIndex>NUM_ADTS_SAMPLING_RATES))
	{
		ucSampleRateIndex=0;
	}
	pHeadInfo->iSampleRate = AdtsSamplingRates[ucSampleRateIndex];


	return true;
}


int main()
{

	memset(&g_AacBuffer, 0, sizeof(aac_buffer));
	g_AacBuffer.infile = fopen(INPUT_FILE_NAME, "rb");
	if (g_AacBuffer.infile == NULL)
	{
		/* unable to open file */
		fprintf(stderr, "Error opening file: %s\n", INPUT_FILE_NAME);
		return 1;
	}

	fseek(g_AacBuffer.infile, 0, SEEK_END);
	double dTotalFileSize = ftell(g_AacBuffer.infile);
	fseek(g_AacBuffer.infile, 0, SEEK_SET);

	if (!(g_AacBuffer.buffer = (unsigned char*)malloc(AAC_BUFFER_SIZE)))
	{
		fprintf(stderr, "Memory allocation error\n");
		return 0;
	}
	memset(g_AacBuffer.buffer, 0, AAC_BUFFER_SIZE);
	g_AacBuffer.at_eof = 0;

	
	
	
	
	unsigned char *pTempBuffer =  (unsigned char*)malloc(AAC_BUFFER_SIZE);
	int iFrameSize;

	size_t sRealRead = fread(g_AacBuffer.buffer, 1, AAC_BUFFER_SIZE, g_AacBuffer.infile);
	g_AacBuffer.bytes_into_buffer = sRealRead;
	g_AacBuffer.bytes_consumed = 0;
	g_AacBuffer.file_offset	   = 0;

	if (sRealRead != AAC_BUFFER_SIZE)
	{
		g_AacBuffer.at_eof = 1;
	}

	long lOffset=0;
	do
	{
		//抛弃已经使用过的数据
		advance_buffer(&g_AacBuffer, lOffset);
		//空的缓存填充新的数据
		fill_buffer(&g_AacBuffer);
		lOffset=0;
		while(1)
		{
			int iConsumNum=0;
			int iRet = getADTSframe(g_AacBuffer.buffer+lOffset, g_AacBuffer.bytes_into_buffer-lOffset,&iConsumNum, pTempBuffer ,&(iFrameSize));
			if(iRet == 0)
			{
				lOffset+=iConsumNum;
				AdtsHeadInfo stHeadInfo;
				if(ParserAdtsHead(pTempBuffer ,iFrameSize,&stHeadInfo))
				{
					printf("Profile:%02d\tChans:%02d\tSampleRate:%05d\tHeadSize:%01d\tFrameSize:%06d\t\n",stHeadInfo.iProfile,stHeadInfo.iChans,stHeadInfo.iSampleRate,stHeadInfo.iHeadLen,stHeadInfo.iFrameSize);
				}
				else
				{
					printf("ADTS Error Frame!\n");
				}

			}
			else if(iRet > 0)
			{
				lOffset+=iConsumNum;
				break;
			}
			else
			{

			}

		}

	}while(g_AacBuffer.at_eof == 0);


	free(g_AacBuffer.buffer);

	printf("Adts Parser Success!!\n");
	getchar();

	return 0;
}


程序运行结果

音频编解码基础(wav/aac/pcma/pcmu)_第3张图片

PCM转G711A/G711U

音频编码分为波形编码和参数编码,常见得编码方式如AAC等是两者之间的编码方式。波形编码就是对声波波形的采样数据进行编码,完全不考虑这个波内部的信息,如时域或者频域上的冗余。参数编码如一个正弦波我们不需要知道在不同时间采样数值,只有知道振幅,频率,相位等信息,编码只保存该信息,在接收方按照这些参数重新建立波形即可播放。G711A/G711U就是波形编码,编码比较简单,只是把样本值从PCM的存储方式16Bit压缩成8Bit,在安防和电话中有应用。DEMO实现把PCM编码成G711A,代码如下:

#include 
#include "g711.h"

#define INPUT_FILE_NAME			 ("huangdun_r48000_FMT_S16_c2.pcm")
#define OUTPUT_FILE_NAME		 ("huangdun_r48000_FMT_S16_c2.g711a")
int main()
{
	FILE*pInputFile = fopen(INPUT_FILE_NAME, "rb");
	if (pInputFile == NULL)
	{
		/* unable to open file */
		fprintf(stderr, "Error opening file: %s\n", INPUT_FILE_NAME);
		return 1;
	}
	FILE*pOutputFile=fopen(OUTPUT_FILE_NAME,"wb+");
	if(pOutputFile == NULL) 
	{
		printf("Create g711a file error\n");
		return -1;
	}
	signed short usSample ;
	unsigned char ucG711Sample;
	int iReadCnt=0;
	int iWriteCnt=0;
	while(!feof(pInputFile))
	{
		fread(&usSample,sizeof(unsigned short),1,pInputFile);
		iReadCnt+=2;
		ucG711Sample = ALaw_Encode(usSample);
		fwrite(&ucG711Sample,1,1,pOutputFile);
		iWriteCnt+=1;
	}

	fclose(pInputFile);
	fclose(pOutputFile);
	printf("ReadCnt:%d WriteCnt:%d PCM TO G711A Success!!!",iReadCnt,iWriteCnt);
	getchar();

}

 

你可能感兴趣的:(音频处理)