百度和讯飞和其他厂都提供了语音识别的接口,这里使用讯飞的识别将本地手机录的音频文件转成文字
以下注意事项:
1.X_Param 参数开始的时候带了空格导致验证不过,原因是讯飞将字符串做了repelce处理
2.讯飞后台设置白名单IP
3.本地的文件请使用ffmpeg转码成pcm标准格式,
参考:http://ai.baidu.com/docs#/ASR-Tool-convert/top
//语音听写
public PostResult<string> Speech(IFormFileCollection fileCollection)
{
#region 上传音频
if (fileCollection.Count == 0) throw new InvalidDataException("没找到需要上传的文件。");
var folder = IO.Directory.GetCurrentDirectory() + "//wwwroot//upload";//先保存到本地,然后云上传
var file = fileCollection[0];
if (!IO.Directory.Exists(folder)) IO.Directory.CreateDirectory(folder);
var filename = file.FileName;
var extension = filename.Substring(filename.LastIndexOf(".")).ToLower(); // 扩展名
var key = filename.Replace(extension, "_") + DateTime.Now.ToString("yyyyMMddHHmmssfff") + extension;
using (IO.FileStream fs = new IO.FileStream(folder + "//" + key, IO.FileMode.Create, IO.FileAccess.ReadWrite))
{
file.CopyTo(fs);
fs.Flush();
} /**/
#endregion
#region 调用接口
//使用ffmpeg音频转码
var path =(folder + "\\" + key);
ConvertToPcm("E:\\ffmpeg\\bin", $"\"{ path}\"", $"\"{path.Replace(extension,".pcm")}\"");//路径中有空格用引号包起来
path = path.Replace(extension, ".pcm");
var appid = "xx";
var apikey = "xxxxxxxx";
var data = File.ReadAllBytes(path);
string base64_audio = Convert.ToBase64String(data);//将二进制的语音文件转base64
string body = "audio=" + System.Web.HttpUtility.UrlEncode(base64_audio);
string url = "http://api.xfyun.cn/v1/service/v1/iat";
string X_Param = ToBase64String("{ \"engine_type\": \"sms16k\", \"aue\": \"raw\"}");//转base64,注意要保去除空格,因为讯飞后台有去空格操作
string X_Time = GetTimeStamp(true);//获取时间戳
HttpContent httpContent = new StringContent(body, Encoding.UTF8, "application/x-www-form-urlencoded");
httpContent.Headers.Add("X-Param", X_Param);
httpContent.Headers.Add("X-CurTime", X_Time);
httpContent.Headers.Add("X-Appid", appid);
httpContent.Headers.Add("X-CheckSum", MD5String(apikey, X_Time, X_Param));
var httpClient = new HttpClient();
HttpResponseMessage response = httpClient.PostAsync(url, httpContent).Result;
var content = "对不起,识别不出来...";
if (response.IsSuccessStatusCode)
{
var responseJson = JsonHelper.Deserialize<XfResponse>(response.Content.ReadAsStringAsync().Result);
if (responseJson.code == "0")
{
content = responseJson.data;
}
else
{
content = responseJson.code;
}
}
return new PostResult<string> { data = content };
#endregion
}
/// <summary>
/// 将Wav音频转成pcm手机音频
/// </summary>
/// <param name="applicationPath">ffmeg.exe文件路径</param>
/// <param name="fileName">WAV文件的路径(带文件名)</param>
/// <param name="targetFilName">生成目前amr文件路径(带文件名)</param>
public void ConvertToPcm(string applicationPath, string fileName, string targetFilName)
{
string c = applicationPath + @"\\ffmpeg.exe -y -i " + fileName + " -acodec pcm_s16le -f s16le -ac 1 -ar 16000 " + targetFilName;
var res = Cmd(c);
}
/// <summary>
/// 执行Cmd命令
/// </summary>
private string Cmd(string c)
{
string strOuput = string.Empty;
try
{
Process p = new Process();
//设置要启动的应用程序
p.StartInfo.FileName = "cmd.exe";
//是否使用操作系统shell启动
p.StartInfo.UseShellExecute = false;
// 接受来自调用程序的输入信息
p.StartInfo.RedirectStandardInput = true;
//输出信息
p.StartInfo.RedirectStandardOutput = true;
// 输出错误
p.StartInfo.RedirectStandardError = true;
//不显示程序窗口
p.StartInfo.CreateNoWindow = true;
//启动程序
p.Start();
//向cmd窗口发送输入信息
p.StandardInput.WriteLine(c + "&exit");
p.StandardInput.AutoFlush = true;
//获取输出信息
strOuput = p.StandardOutput.ReadToEnd();
//等待程序执行完退出进程
p.WaitForExit();
p.Close();
}
catch
{ }
return strOuput;
}
public class XfResponse
{
public string code { get; set; }
public string desc { get; set; }
public string data { get; set; }
public string sid { get; set; }
}
public static string ToBase64String(string value)
{
value = value.Replace(" ", "");
if (value == null || value == "")
{
return "";
}
byte[] bytes = Encoding.UTF8.GetBytes(value); return Convert.ToBase64String(bytes);
}
public static string MD5String(string api_key, string x_time, string x_param)
{
var hash = System.Security.Cryptography.MD5.Create(); //拼接签名数据
var signStr = api_key + x_time + x_param;
var bytes = Encoding.UTF8.GetBytes(signStr); //使用MD5加密
var md5Val = hash.ComputeHash(bytes); //把二进制转化为大写的十六进制
StringBuilder result = new StringBuilder();
foreach (var c in md5Val)
{
result.Append(c.ToString("X2"));
}
return result.ToString().ToLower();
}
public static string GetTimeStamp(bool bflag)
{
TimeSpan ts = DateTime.UtcNow - new DateTime(1970, 1, 1, 0, 0, 0, 0);
string ret = string.Empty;
if (bflag)
ret = Convert.ToInt64(ts.TotalSeconds).ToString();
else
ret = Convert.ToInt64(ts.TotalMilliseconds).ToString();
return ret;
}
感谢峰_e901 https://www.jianshu.com/p/089a2d466188