技术在于交流、沟通,转载请注明出处并保持作品的完整性。
原文:https://blog.csdn.net/hiwubihe/article/details/81258879
[音频编解码系列文章]
本文介绍音频处理基础知识,介绍常见的音频问题处理。主要包含以下内容
PCM转G711A/G711U
WAV格式介绍
WAV是微软的RIFF文件的一个特例,通常由一个文件头和若干个CHUNK组成,通常是由RIFF文件头类型“WAVE”子chunk为“fmt”,“data”,和可选数量的chunk组成。format如下:
一段WAV格式实例
52 49 46 46 24 08 00 00 57 41 56 45 66 6d 74 20 10 00 00 00 01 00 02 00 22 56 00 00 88 58 01 00 04 00 10 00 64 61 74 61 00 08 00 00 00 00 00 00 24 17 1e f3 3c 13 3c 14 16 f9 18 f9 34 e7 23 a6 3c f2 24 f2 11 ce 1a 0d
结构分析如下图
注意事项
1.二进制数据都是"小端"存储方式。
2.样本存储8位范围为(0-255),样本存储格式16位范围(-32768-32767)
WAV头解析并保持PCM
WAV文件是一种WINDOS riff文件,只是对PCM加了一个头,没做压缩处理,加完头后一般播放就能播放了。DEMO代码如下:
/*******************************************************************************
Copyright (c) wubihe Tech. Co., Ltd. All rights reserved.
--------------------------------------------------------------------------------
Date Created: 2014-10-25
Author: wubihe QQ:1269122125 Email:[email protected]
Description: 本例子解析WAV头,并打印音频相关信息
--------------------------------------------------------------------------------
Modification History
DATE AUTHOR DESCRIPTION
--------------------------------------------------------------------------------
********************************************************************************/
#include
#include
#include
#include
#include
#ifndef HAVE_INT32_T
typedef signed int int32_t;
#endif
#ifndef HAVE_INT16_T
typedef signed short int16_t;
#endif
#ifndef HAVE_U_INT32_T
typedef unsigned int u_int32_t;
#endif
#ifndef HAVE_U_INT16_T
typedef unsigned short u_int16_t;
#endif
#ifdef WORDS_BIGENDIAN
# define UINT32(x) SWAP32(x)
# define UINT16(x) SWAP16(x)
#else
# define UINT32(x) (x)
# define UINT16(x) (x)
#endif
typedef struct
{
FILE *f;
int channels;
int samplebytes;
int samplerate;
int samples;
int bigendian;
int isfloat;
} pcmfile_t;
typedef struct
{
u_int32_t label; /* 'RIFF' */
u_int32_t length; /* Length of rest of file */
u_int32_t chunk_type; /* 'WAVE' */
}
riff_t;
typedef struct
{
u_int32_t label;
u_int32_t len;
}
riffsub_t;
#ifdef _MSC_VER
#pragma pack(push, 1)
#endif
#define WAVE_FORMAT_PCM 1
#define WAVE_FORMAT_FLOAT 3
#define WAVE_FORMAT_EXTENSIBLE 0xfffe
#define INPUT_FILE ("huangdun.wav")
#define OUTPUT_FILE ("huangdun_r48000_FMT_S16_c2.pcm")
struct WAVEFORMATEX
{
u_int16_t wFormatTag;
u_int16_t nChannels;
u_int32_t nSamplesPerSec;
u_int32_t nAvgBytesPerSec;
u_int16_t nBlockAlign;
u_int16_t wBitsPerSample;
u_int16_t cbSize;
}
#ifdef __GNUC
__attribute__((packed))
#endif
;
struct WAVEFORMATEXTENSIBLE
{
struct WAVEFORMATEX Format;
union {
u_int16_t wValidBitsPerSample; // bits of precision
u_int16_t wSamplesPerBlock; // valid if wBitsPerSample==0
u_int16_t wReserved; // If neither applies, set to zero.
} Samples;
u_int32_t dwChannelMask; // which channels are present in stream
unsigned char SubFormat[16]; // guid
}
#ifdef __GNUC
__attribute__((packed))
#endif
;
#ifdef _MSC_VER
#pragma pack(pop)
#endif
static unsigned char waveformat_pcm_guid[16] =
{
WAVE_FORMAT_PCM,0,0,0,
0x00, 0x00,
0x10, 0x00,
0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71
};
static void unsuperr(const char *name)
{
fprintf(stderr, "%s: file format not supported\n", name);
}
pcmfile_t *wav_open_read(const char *name, int rawinput)
{
int i;
int skip;
FILE *wave_f;
riff_t riff;
riffsub_t riffsub;
struct WAVEFORMATEXTENSIBLE wave;
char *riffl = "RIFF";
char *wavel = "WAVE";
char *bextl = "BEXT";
char *fmtl = "fmt ";
char *datal = "data";
int fmtsize;
pcmfile_t *sndf;
int dostdin = 0;
if (!strcmp(name, "-"))
{
#ifdef _WIN32
_setmode(_fileno(stdin), O_BINARY);
#endif
wave_f = stdin;
dostdin = 1;
}
else if (!(wave_f = fopen(name, "rb")))
{
perror(name);
return NULL;
}
if (!rawinput) // header input
{
if (fread(&riff, 1, sizeof(riff), wave_f) != sizeof(riff))
return NULL;
if (memcmp(&(riff.label), riffl, 4))
return NULL;
if (memcmp(&(riff.chunk_type), wavel, 4))
return NULL;
// handle broadcast extensions. added by pro-tools,otherwise it must be fmt chunk.
if (fread(&riffsub, 1, sizeof(riffsub), wave_f) != sizeof(riffsub))
return NULL;
riffsub.len = UINT32(riffsub.len);
if (!memcmp(&(riffsub.label), bextl, 4))
{
fseek(wave_f, riffsub.len, SEEK_CUR);
if (fread(&riffsub, 1, sizeof(riffsub), wave_f) != sizeof(riffsub))
return NULL;
riffsub.len = UINT32(riffsub.len);
}
if (memcmp(&(riffsub.label), fmtl, 4))
return NULL;
memset(&wave, 0, sizeof(wave));
fmtsize = (riffsub.len < sizeof(wave)) ? riffsub.len : sizeof(wave);
if (fread(&wave, 1, fmtsize, wave_f) != fmtsize)
return NULL;
for (skip = riffsub.len - fmtsize; skip > 0; skip--)
fgetc(wave_f);
for (i = 0;; i++)
{
if (fread(&riffsub, 1, sizeof(riffsub), wave_f) != sizeof(riffsub))
return NULL;
riffsub.len = UINT32(riffsub.len);
if (!memcmp(&(riffsub.label), datal, 4))
break;
if (i > 10)
return NULL;
for (skip = riffsub.len; skip > 0; skip--)
fgetc(wave_f);
}
if (UINT16(wave.Format.wFormatTag) != WAVE_FORMAT_PCM && UINT16(wave.Format.wFormatTag) != WAVE_FORMAT_FLOAT)
{
if (UINT16(wave.Format.wFormatTag) == WAVE_FORMAT_EXTENSIBLE)
{
if (UINT16(wave.Format.cbSize) < 22) // struct too small
return NULL;
if (memcmp(wave.SubFormat, waveformat_pcm_guid, 16))
{
waveformat_pcm_guid[0] = WAVE_FORMAT_FLOAT;
if (memcmp(wave.SubFormat, waveformat_pcm_guid, 16))
{
unsuperr(name);
return NULL;
}
}
}
else
{
unsuperr(name);
return NULL;
}
}
}
sndf = (pcmfile_t *)malloc(sizeof(*sndf));
memset(sndf, 0, sizeof(*sndf));
sndf->f = wave_f;
if (UINT16(wave.Format.wFormatTag) == WAVE_FORMAT_FLOAT) {
sndf->isfloat = 1;
} else {
sndf->isfloat = (wave.SubFormat[0] == WAVE_FORMAT_FLOAT);
}
if (rawinput)
{
sndf->bigendian = 1;
if (dostdin)
sndf->samples = 0;
else
{
fseek(sndf->f, 0 , SEEK_END);
sndf->samples = ftell(sndf->f);
rewind(sndf->f);
}
}
else
{
sndf->bigendian = 0;
sndf->channels = UINT16(wave.Format.nChannels);
sndf->samplebytes = UINT16(wave.Format.wBitsPerSample) / 8;
sndf->samplerate = UINT32(wave.Format.nSamplesPerSec);
sndf->samples = riffsub.len / (sndf->samplebytes * sndf->channels);
}
return sndf;
}
int wav_close(pcmfile_t *sndf)
{
int i = fclose(sndf->f);
free(sndf);
return i;
}
int main()
{
FILE *fpout;
fpout=fopen(OUTPUT_FILE,"wb+");
if(fpout == NULL)
{
printf("Create pcm file error\n");
return -1;
}
pcmfile_t * pPcmFile = wav_open_read(INPUT_FILE, 0);
printf("channels:%1d\t samplebytes:%1d\t samplerate:%06d\t samples:%07d\t bigendian:%1d\n",pPcmFile->channels,pPcmFile->samplebytes,pPcmFile->samplerate,pPcmFile->samples,pPcmFile->bigendian);
unsigned short usSample;
while(!feof(pPcmFile->f))
{
fread(&usSample,sizeof(unsigned short),1,pPcmFile->f);
fwrite(&usSample,sizeof(unsigned short),1,fpout);
}
wav_close(pPcmFile);
fclose(fpout);
printf("Parser WAV Success!!");
getchar();
return 0;
}
运行结果保存PCM文件
PCM文件加WAV头
ADTS格式AAC帧获取
ADTS结构
ADTSHeader结构
/*
* ADTS Header:
* MPEG-2 version 56 bits (byte aligned)
* MPEG-4 version 56 bits (byte aligned) - note - changed for 0.99 version
*
* syncword 12 bits
* id 1 bit
* layer 2 bits
* protection_absent 1 bit
* profile 2 bits
* sampling_frequency_index 4 bits
* private 1 bit
* channel_configuraton 3 bits
* original 1 bit
* home 1 bit
* copyright_id 1 bit
* copyright_id_start 1 bit
* aac_frame_length 13 bits
* adts_buffer_fullness 11 bits
* num_raw_data_blocks 2 bits
*
* if (protection_absent == 0)
* crc_check 16 bits
*/
解析ADTS DEMO代码:
/*******************************************************************************
Copyright (c) wubihe Tech. Co., Ltd. All rights reserved.
--------------------------------------------------------------------------------
Date Created: 2014-10-25
Author: wubihe QQ:1269122125 Email:[email protected]
Description: 本例子解析ADTS封装的AAC结构,获取AAC一帧数据,并打印帧信息
--------------------------------------------------------------------------------
Modification History
DATE AUTHOR DESCRIPTION
--------------------------------------------------------------------------------
********************************************************************************/
#include
#include
#include
#define INPUT_FILE_NAME ("huangdun.aac")
#define MIN_ADTS_HEAD_LEN (7)
#define AAC_BUFFER_SIZE (8192)
#define NUM_ADTS_SAMPLING_RATES (16)
unsigned int AdtsSamplingRates[NUM_ADTS_SAMPLING_RATES] =
{
96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
16000, 12000, 11025, 8000, 7350, 0, 0, 0
};
typedef struct
{
//当前缓存总数据量
long bytes_into_buffer;
//当前缓存已经消耗数据量
long bytes_consumed;
//整个文件数据使用量
long file_offset;
//缓存
unsigned char *buffer;
//文件结束标志
int at_eof;
//文件操作句柄
FILE *infile;
} aac_buffer;
//aac数据缓存
aac_buffer g_AacBuffer;
static int fill_buffer(aac_buffer *b)
{
int bread;
//解析消耗数据
if (b->bytes_consumed > 0)
{
//有剩余数据 向前面移动
if (b->bytes_into_buffer)
{
memmove((void*)b->buffer, (void*)(b->buffer + b->bytes_consumed),
b->bytes_into_buffer*sizeof(unsigned char));
}
if (!b->at_eof)
{
bread = fread((void*)(b->buffer + b->bytes_into_buffer), 1,
b->bytes_consumed, b->infile);
if (bread != b->bytes_consumed)
b->at_eof = 1;
b->bytes_into_buffer += bread;
}
b->bytes_consumed = 0;
if (b->bytes_into_buffer > 3)
{
if (memcmp(b->buffer, "TAG", 3) == 0)
b->bytes_into_buffer = 0;
}
if (b->bytes_into_buffer > 11)
{
if (memcmp(b->buffer, "LYRICSBEGIN", 11) == 0)
b->bytes_into_buffer = 0;
}
if (b->bytes_into_buffer > 8)
{
if (memcmp(b->buffer, "APETAGEX", 8) == 0)
b->bytes_into_buffer = 0;
}
}
return 1;
}
static void advance_buffer(aac_buffer *b, int bytes)
{
b->file_offset += bytes;
b->bytes_consumed = bytes;
b->bytes_into_buffer -= bytes;
if (b->bytes_into_buffer < 0)
b->bytes_into_buffer = 0;
}
//返回值-1出错 1帧数据长度不够 0返回完整一帧
int getADTSframe(unsigned char* buffer, int buf_size,int *ipConsume, unsigned char* data ,int* data_size)
{
int size = 0;
int iConsume=0;
if(!buffer || !data || !data_size )
{
return -1;
}
while(1)
{
if(buf_size < 7 )
{
*ipConsume = iConsume;
return 1;
}
//Sync words
if((buffer[0] == 0xff) && ((buffer[1] & 0xf0) == 0xf0) )
{
size |= ((buffer[3] & 0x03) <<11); //high 2 bit
size |= buffer[4]<<3; //middle 8 bit
size |= ((buffer[5] & 0xe0)>>5); //low 3bit
break;
}
iConsume++;
--buf_size;
++buffer;
}
if(buf_size < size)
{
*ipConsume = iConsume;
return 1;
}
iConsume+=size;
*ipConsume = iConsume;
memcpy(data, buffer, size);
*data_size = size;
return 0;
}
typedef struct _AdtsHeadInfo
{
int iProfile;
int iChans;
int iSampleRate;
int iHeadLen;
int iFrameSize;
}AdtsHeadInfo;
bool ParserAdtsHead(unsigned char*pData,int iDataLen,AdtsHeadInfo*pHeadInfo)
{
if(iDataLeniHeadLen = 9;
}
else
{
pHeadInfo->iHeadLen = MIN_ADTS_HEAD_LEN;
}
//帧长度
pHeadInfo->iFrameSize = (((unsigned short)(pData[3] & 0x3)) << 11)
| (((unsigned short)pData[4]) << 3) | (pData[5] >> 5);
//AAC等级
pHeadInfo->iProfile = ((pData[2] & 0xc0) >> 6);
//通道数
pHeadInfo->iChans = ((pData[2] & 0x1) << 2) | ((pData[3] & 0xc0) >> 6);
unsigned char ucSampleRateIndex = (pData[2] & 0x3c) >> 2;
if((ucSampleRateIndex<0)||(ucSampleRateIndex>NUM_ADTS_SAMPLING_RATES))
{
ucSampleRateIndex=0;
}
pHeadInfo->iSampleRate = AdtsSamplingRates[ucSampleRateIndex];
return true;
}
int main()
{
memset(&g_AacBuffer, 0, sizeof(aac_buffer));
g_AacBuffer.infile = fopen(INPUT_FILE_NAME, "rb");
if (g_AacBuffer.infile == NULL)
{
/* unable to open file */
fprintf(stderr, "Error opening file: %s\n", INPUT_FILE_NAME);
return 1;
}
fseek(g_AacBuffer.infile, 0, SEEK_END);
double dTotalFileSize = ftell(g_AacBuffer.infile);
fseek(g_AacBuffer.infile, 0, SEEK_SET);
if (!(g_AacBuffer.buffer = (unsigned char*)malloc(AAC_BUFFER_SIZE)))
{
fprintf(stderr, "Memory allocation error\n");
return 0;
}
memset(g_AacBuffer.buffer, 0, AAC_BUFFER_SIZE);
g_AacBuffer.at_eof = 0;
unsigned char *pTempBuffer = (unsigned char*)malloc(AAC_BUFFER_SIZE);
int iFrameSize;
size_t sRealRead = fread(g_AacBuffer.buffer, 1, AAC_BUFFER_SIZE, g_AacBuffer.infile);
g_AacBuffer.bytes_into_buffer = sRealRead;
g_AacBuffer.bytes_consumed = 0;
g_AacBuffer.file_offset = 0;
if (sRealRead != AAC_BUFFER_SIZE)
{
g_AacBuffer.at_eof = 1;
}
long lOffset=0;
do
{
//抛弃已经使用过的数据
advance_buffer(&g_AacBuffer, lOffset);
//空的缓存填充新的数据
fill_buffer(&g_AacBuffer);
lOffset=0;
while(1)
{
int iConsumNum=0;
int iRet = getADTSframe(g_AacBuffer.buffer+lOffset, g_AacBuffer.bytes_into_buffer-lOffset,&iConsumNum, pTempBuffer ,&(iFrameSize));
if(iRet == 0)
{
lOffset+=iConsumNum;
AdtsHeadInfo stHeadInfo;
if(ParserAdtsHead(pTempBuffer ,iFrameSize,&stHeadInfo))
{
printf("Profile:%02d\tChans:%02d\tSampleRate:%05d\tHeadSize:%01d\tFrameSize:%06d\t\n",stHeadInfo.iProfile,stHeadInfo.iChans,stHeadInfo.iSampleRate,stHeadInfo.iHeadLen,stHeadInfo.iFrameSize);
}
else
{
printf("ADTS Error Frame!\n");
}
}
else if(iRet > 0)
{
lOffset+=iConsumNum;
break;
}
else
{
}
}
}while(g_AacBuffer.at_eof == 0);
free(g_AacBuffer.buffer);
printf("Adts Parser Success!!\n");
getchar();
return 0;
}
程序运行结果
PCM转G711A/G711U
音频编码分为波形编码和参数编码,常见得编码方式如AAC等是两者之间的编码方式。波形编码就是对声波波形的采样数据进行编码,完全不考虑这个波内部的信息,如时域或者频域上的冗余。参数编码如一个正弦波我们不需要知道在不同时间采样数值,只有知道振幅,频率,相位等信息,编码只保存该信息,在接收方按照这些参数重新建立波形即可播放。G711A/G711U就是波形编码,编码比较简单,只是把样本值从PCM的存储方式16Bit压缩成8Bit,在安防和电话中有应用。DEMO实现把PCM编码成G711A,代码如下:
#include
#include "g711.h"
#define INPUT_FILE_NAME ("huangdun_r48000_FMT_S16_c2.pcm")
#define OUTPUT_FILE_NAME ("huangdun_r48000_FMT_S16_c2.g711a")
int main()
{
FILE*pInputFile = fopen(INPUT_FILE_NAME, "rb");
if (pInputFile == NULL)
{
/* unable to open file */
fprintf(stderr, "Error opening file: %s\n", INPUT_FILE_NAME);
return 1;
}
FILE*pOutputFile=fopen(OUTPUT_FILE_NAME,"wb+");
if(pOutputFile == NULL)
{
printf("Create g711a file error\n");
return -1;
}
signed short usSample ;
unsigned char ucG711Sample;
int iReadCnt=0;
int iWriteCnt=0;
while(!feof(pInputFile))
{
fread(&usSample,sizeof(unsigned short),1,pInputFile);
iReadCnt+=2;
ucG711Sample = ALaw_Encode(usSample);
fwrite(&ucG711Sample,1,1,pOutputFile);
iWriteCnt+=1;
}
fclose(pInputFile);
fclose(pOutputFile);
printf("ReadCnt:%d WriteCnt:%d PCM TO G711A Success!!!",iReadCnt,iWriteCnt);
getchar();
}