现今语音识别已经做的十分成熟,微软的Cortana,苹果的Siri,还有讯飞、百度语音等。但是说到不花钱,单机运行还是Microsoft的Speech_Note语音识别系统比较省事。项目原因,最近做了个小的语音识别软件,一路心酸记录下来,以资备用。
开发平台:Visual Studio 2015
首先,得下载两个包:
1)SpeechSDK51.exe (67.0 MB)
2)SpeechSDK51LangPack.exe (81.0 MB)
先安第一个,再安第二个
然后,配置时,一个speechlib在引用中直接导入C:\Windows\System32\Speech\Common\sapi.dll,然后system.speech在引用—>程序集中搜索“speech”。
配置好后,可以写一个小的用例来测试一下模块是否安装成功。(这段是网上其他朋友的博客里抄来的,具体出处忘了,知道的朋友可以给我留言一下,我标注个参考文献,谢谢~)
using System.Speech.Recognition;
private void Form1_Load(object sender, EventArgs e)
{
SRE.SetInputToDefaultAudioDevice(); // <======= 默认的语音输入设备,你可以设定为去识别一个WAV文件。
GrammarBuilder GB = new GrammarBuilder();
GB.Append(new Choices(new string[] { "选择", "Choose" }));
GB.Append(new Choices(new string[] { "Green", "红色", "绿色" }));
Grammar G = new Grammar(GB);
G.SpeechRecognized += new EventHandler(G_SpeechRecognized);
SRE.LoadGrammar(G);
SRE.RecognizeAsync(RecognizeMode.Multiple);//<======= 异步调用识别引擎,允许多次识别(否则程序只响应你的一句话)
}
void G_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
//Text = e.Result.Text;
MessageBox.Show(e.Result.Text);
switch (e.Result.Text)
{
case "选择红色":
BackColor = Color.Red;
break;
case "选择绿色":
BackColor = Color.Green;
break;
case "Choose Green":
BackColor = Color.Green;
break;
}
}
在开发过程中,发现Speech_Note模块可以有两种辨识模式,一种是单词辨识。也就是上文代码中的,首先给他初始化一个单词库,这样语音识别可以每个词逐字匹配,正确率挺高的。问题在于,初始化库比较烦人,其次要是想拼接短语得做多次GB.Append(new Choices(new string[] { "选择", "Choose" }))这种操作。自适应性不好,而且,辨识一整句话比较困难。
解决初始化单词库的问题,我用的是将整篇英文文章导入到单词库中,这样就省的人工输入。话不多说,直接贴代码。
/*处理文档,删掉标点符号*/
private void btn_TextProcessing_Click(object sender, EventArgs e)
{
string t = tbx_Article.Text;
if (t == null || t == "")
{
MessageBox.Show("文本不可为空,请重新输入。");
return;
}
else
{
t = t.Replace(",", "");
t = t.Replace(".", "");
t = t.Replace(";", "");
t = t.Replace(":", "");
t = t.Replace("'", "");
t = t.Replace("\"", "");
t = t.Replace("?", "");
t = t.Replace("<", "");
t = t.Replace(">", "");
t = t.Replace("/", "");
t = t.Replace("|", "");
t = t.Replace("[", ""); t = t.Replace("]", "");
t = t.Replace("{", ""); t = t.Replace("}", "");
t = t.Replace("-", ""); t = t.Replace("_", "");
t = t.Replace("+", "");
t = t.Replace("=", "");
t = t.Replace("(", ""); t = t.Replace(")", "");
t = t.Replace("$", "");
t = t.Replace("!", "");
t = t.Replace("@", "");
t = t.Replace("#", "");
t = t.Replace("%", ""); t = t.Replace("^", "");
t = t.Replace("&", ""); t = t.Replace("*", "");
t = t.Replace("\r", " ");
}
tbx_Article.Text = t;
}
/*将单词存入数据库中以资备用*/
private void btn_InputDataBase_Click(object sender, EventArgs e)
{
string[] arr = tbx_Article.Text.Split(' ');
foreach (string i in arr)
{
int ex;
string cmd;
if (i == ""|i==null)
{
continue;
}
cmd = "SELECT * FROM `keystb` WHERE `Keys`='" + i.ToLower() + "'";
ex = MySqlHelper.GetDataSet(MySqlHelper.Conn, CommandType.Text, "select * from keystb", null).Tables.Count;
try
{
cmd = "INSERT INTO keystb VALUES ('" + i.ToLower() + "')";
ex = MySqlHelper.ExecuteNonQuery(MySqlHelper.Conn, CommandType.Text, cmd, null);
}
catch { continue; }
}
Common.getKeys();
Form_Keys d = new Form_Keys();
d.Show();
}
这块处理的比较笨,貌似可以用正则表达式处理这个问题,奈何不会,知道的朋友求指教。
这块引用了网上的一个类,具体出处知情者麻烦告知哈。谢谢
public class SRecognition
{
public SpeechRecognitionEngine recognizer = null;//语音识别引擎
public DictationGrammar dictationGrammar = null; //自然语法
public System.Windows.Forms.Control cDisplay; //显示控件
public SRecognition(string[] fg) //创建关键词语列表
{
CultureInfo myCIintl = new CultureInfo("zh-CN");
foreach (RecognizerInfo config in SpeechRecognitionEngine.InstalledRecognizers())//获取所有语音引擎
{
if (config.Culture.Equals(myCIintl) && config.Id == "MS-2052-80-DESK")
{
recognizer = new SpeechRecognitionEngine(config);
break;
}//选择识别引擎
}
if (recognizer != null)
{
InitializeSpeechRecognitionEngine(fg);//初始化语音识别引擎
dictationGrammar = new DictationGrammar();
}
else
{
MessageBox.Show("创建语音识别失败");
}
}
public SRecognition(string[] fg,int i) //创建关键词语列表
{
CultureInfo myCIintl = new CultureInfo("en-US");
foreach (RecognizerInfo config in SpeechRecognitionEngine.InstalledRecognizers())//获取所有语音引擎
{
if (config.Culture.Equals(myCIintl))
{
recognizer = new SpeechRecognitionEngine(config);
break;
}//选择识别引擎
}
if (recognizer != null)
{
InitializeSpeechRecognitionEngine(fg);//初始化语音识别引擎
dictationGrammar = new DictationGrammar();
}
else
{
MessageBox.Show("创建语音识别失败");
}
}
private void InitializeSpeechRecognitionEngine(string[] fg)
{
recognizer.SetInputToDefaultAudioDevice();//选择默认的音频输入设备
Grammar customGrammar = CreateCustomGrammar(fg);
//根据关键字数组建立语法
recognizer.UnloadAllGrammars();
recognizer.LoadGrammar(customGrammar);
//加载语法
recognizer.SpeechRecognized += new EventHandler(recognizer_SpeechRecognized);
//recognizer.SpeechHypothesized += new EventHandler (recognizer_SpeechHypothesized);
}
public void BeginRec(Control tbResult)//关联窗口控件
{
TurnSpeechRecognitionOn();
TurnDictationOn();
cDisplay = tbResult;
}
public void over()//停止语音识别引擎
{
TurnSpeechRecognitionOff();
}
public virtual Grammar CreateCustomGrammar(string[] fg) //创造自定义语法
{
GrammarBuilder grammarBuilder = new GrammarBuilder();
grammarBuilder.Append(new Choices(fg));
return new Grammar(grammarBuilder);
}
private void TurnSpeechRecognitionOn()//启动语音识别函数
{
if (recognizer != null)
{
recognizer.RecognizeAsync(RecognizeMode.Multiple);
//识别模式为连续识别
}
else
{
MessageBox.Show("创建语音识别失败");
}
}
private void TurnSpeechRecognitionOff()//关闭语音识别函数
{
if (recognizer != null)
{
recognizer.RecognizeAsyncStop();
TurnDictationOff();
}
else
{
MessageBox.Show("创建语音识别失败");
}
}
private void recognizer_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
{
TimeSpan ts= System.DateTime.Now.Subtract(System.DateTime.Now);
//识别出结果完成的动作,通常把识别结果传给某一个控件
if (Common.t.Equals(new DateTime(2000, 1, 1, 21, 21, 21)))
{
Common.t = System.DateTime.Now;
}
else
{
ts = System.DateTime.Now.Subtract(Common.t);
}
//MessageBox.Show(ts.TotalSeconds.ToString());
string text = e.Result.Text;
//根据时间填入相应的标点
if (ts.TotalSeconds < 5)
{
text += " ";
}
else if (ts.TotalSeconds < 10)
{
text += ", ";
}
else
{
text += ". ";
}
cDisplay.Text += text;
}
private void TurnDictationOn()
{
if (recognizer != null)
{
recognizer.LoadGrammar(dictationGrammar);
//加载自然语法
}
else
{
MessageBox.Show("创建语音识别失败");
}
}
private void TurnDictationOff()
{
if (dictationGrammar != null)
{
recognizer.UnloadGrammar(dictationGrammar);
//卸载自然语法
}
else
{
MessageBox.Show("创建语音识别失败");
}
}
}
具体引用过程是这样的~其中button1是开始,button2是结束。
private SRecognition sr;
private void button1_Click(object sender, EventArgs e)
{
sr.BeginRec(textBox1);
button1.Enabled = false;
button2.Enabled = true;
}
private void button2_Click(object sender, EventArgs e)
{
sr.over();
button1.Enabled = true;
button2.Enabled = false;
}
这样可以大概完成一句话的辨识,但是从类的定义中 CultureInfo myCIintl = new CultureInfo(“zh-CN”);这句话可以看出,他是分语言的,所以要保证有行营的微软语音库,我这边说是中文系统的原因,只有中文的语音库,英文的库需要有英文的系统,这块我还没舍得卸了系统重新装。所以就先放下了。
整个项目完成,基本上功能大概如此。还是开篇一句,记录一下,以资备用。