项目中遇到一个问题,集成的语音识别模块,由于对采样率有要求,所以识别结束后保存的PCM音频文件是用的16K的采样率,而原先的项目中,包括安卓端的语音播报也都是使用8K的采样率,这样就出现个问题:是把16K的PCM转换成16K的AMR还8K的AMR?
方案一:16K PCM 转换成 16K AMR
由于对音频转换相关方面知识的欠缺,只好上谷歌、百度,也成功的找到一篇关于转换的文章:使用opencore_amr实现WAV 转 AMR (8Khz,16Khz)(更新版)
解决方案也很简单,先将PCM转换成WAV,其实PCM就是缺少信息头的WAV,所以转换WAV就是给PCM添加头信息。
这里有现成解决方案,请参考别人实现的代码,git地址如下:https://github.com/codemonkeybulucck/opencore-amrDemo-iOS
方案二:16K PCM转换成8K PCM,再转换成8K AMR
通过方案一我已经成功转换成16K的AMR,上传TFS服务器上后,发现H5无法正常播报,安卓端还没有转换成功,推荐给安卓方案二,结果他们找到了一个网上的现成解决方案:JSSRC,并且顺利完成把16K PCM转换成了8K AMR,也解决后续H5播放的问题。
通过代码了解到,JSSRC是基于SSRC的C语言版本翻译成JAVA,于是在github上找到了C语言版的源码:https://github.com/ronalde/ssrc-packaging
通过研究JAVA版和C语言版,最终在ssrc.c中新增了一个转换方法:
int tansfer(FILE *fpi,FILE *fpo,int sfrq,int dfrq,int bps, int dbps, int nch,unsigned int length, double att, int dither, int quiet_) {
int twopass = 0;
int pdf = 0;
double noiseamp, peak;
noiseamp = 0.18;
if (dither < 0 || dither > 4) {
fprintf(stderr,"unrecognized dither type : %d\n", dither);
return -1;
}
quiet = quiet_;
if (!quiet) printf("Shibatch sampling rate converter version " VERSION "\n\n");
if (bps != 1 && bps != 2 && bps != 3 && bps != 4) {
fprintf(stderr,"Error: Only 8bit, 16bit and 24bit PCM are supported.\n");
return -1;
}
if (dbps == -1) {
if (bps != 1) {
dbps = bps;
} else {
dbps = 2;
}
if (dbps == 4) {
dbps = 3;
}
}
if (dfrq == -1) {
dfrq = sfrq;
}
if (dither == -1) {
if (dbps < bps) {
if (dbps == 1) {
dither = 4;
} else {
dither = 3;
}
} else {
dither = 1;
}
}
if (!quiet) {
const char *dtype[] = {
"none","no noise shaping","triangular spectral shape","ATH based noise shaping","ATH based noise shaping(less amplitude)"
};
const char *ptype[] = {
"rectangular","triangular","gaussian"
};
printf("frequency : %d -> %d\n",sfrq,dfrq);
printf("attenuation : %gdB\n",att);
printf("bits per sample : %d -> %d\n",bps*8,dbps*8);
printf("nchannels : %d\n",nch);
printf("length : %d bytes, %g secs\n",length,(double)length/bps/nch/sfrq);
if (dither == 0) {
printf("dither type : none\n");
} else {
printf("dither type : %s, %s p.d.f, amp = %g\n",dtype[dither],ptype[pdf],noiseamp);
}
printf("\n");
}
if (sfrq < dfrq) {
peak = upsample(fpi,fpo,nch,bps,dbps,sfrq,dfrq,1,length/bps/nch,twopass,dither);
}
else if (sfrq > dfrq) {
peak = downsample(fpi,fpo,nch,bps,dbps,sfrq,dfrq,1,length/bps/nch,twopass,dither);
}
else {
peak = no_src(fpi,fpo,nch,bps,dbps,1,length/bps/nch,twopass,dither);
}
if (!quiet) {
printf("\n");
}
if (dither != 0) {
quit_shaper(nch);
}
if (!twopass && peak > 1) {
if (!quiet) printf("clipping detected : %gdB\n",20*log10(peak));
}
return 0;
}
通过下述的使用方式,即可完成16kPCM到8kPCM的转换,至于8kPCM到8kAMR就通过大家常用的opencore-amr库就可以实现了。
char *pcm16k = (char *)[[[NSBundle mainBundle] pathForResource:@"uscvoice16k" ofType:@"pcm"] cStringUsingEncoding:NSUTF8StringEncoding];
FILE *fpi = fopen(pcm16k, "rb");
if (fpi == NULL)
{
return NO;
}
NSString *documentDir = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES).firstObject;
char *pcm8k = (char *)[[documentDir stringByAppendingPathComponent:@"pcm8k.pcm"] cStringUsingEncoding:NSUTF8StringEncoding];
// 创建并初始化amr文件
FILE *fpo = fopen(pcm8k, "wb");
if (fpo == NULL)
{
fclose(fpi);
return NO;
}
tansfer(fpi, fpo, 16000, 8000, 2, 2, 1, INT_MAX, 0,0, true);