前奏,浑浑噩噩已经工作一年多,这一年多收获还是挺多的。逛园子应该有两年多了,工作后基本上是天天都会来园子逛逛,园子 里还是有很多牛人写了一些不错的博客,帮我解决很多问题。但是一直没写过博客,归根到底一个字“懒”,还有就是不知道该写 些什么...
今天把我最近研究讯飞语音东东,分享一下,不过有些还是前辈们提供的。之前公司让我做一个小的语音识别功能,一开始我就建议使用讯飞语音,个人觉得讯飞识别正确率还是可观的。可是老总说不能考 虑联网,还有就是钱的问题。想到微软自带语音识别引擎(基于win7)。第一次接触到语音识别,没什么头绪,只有收集相关资料 。最后成品出来了,但是识别效果不是那么满意,老总说那就将就用吧,我想他都那样说了,我也没多大意见...开年后老总老总 买了个iphone5玩了siri后,觉得我们现在那个语音太丑陋了,让换一个解决方案,识别率要达到90%。国内有好几家公司做语音识 别的比如科大讯飞、云知声、捷通华声以及紫冬锐意语音都做了一定开放。市面上我知道语音助手有百度语音助手、虫洞语音助手 、360语音助手以及科大讯飞语点;前两个我不知道采用那家公司的还是自己研发的不过都没开开放,360语音助手采用讯飞的。然 后我就继续提议使用讯飞语音SDK,于是乎同事们都下载讯飞语点来玩玩,老总说那就试试讯飞语音SDK。
先做一个简单demo,看看识别效果,感觉识别率上能够满足要求。一般要的结果不光只是要把所说的话翻译成文字,而是需要的是 语义的理解:例如我要去北京,直接返回北京这个关键。目前讯飞还没把这个接口开放出来,公司负责人说今年会把这个开放出来 。那现在只能使用关键词识别语法,一种是直接是文本词库;另一种是ABNF语法。ABNF写法有点烦杂(语法文件里使用到的词句都是指定的,对于无法枚举的词句暂时没有很好的解决办法。),就直接采用文本词库(因为关键词有点多)。讯飞还有专门人负责关于讯飞语音问题解答(QQ群:153789256)。
讯飞提供msc.dll这个DLL,调用DLL的封装:
/// <summary>}
说明一下:“QISRUploadData”(上传词库)这个函数在开发文档里面没的,讯飞遗漏了。
具体实现(关键词识别,文本词库):
类:MscNet
#region 定义字段
如果采用ABNF语法,只是与文本词库加载语法方式有点不一样:
/// <summary>
/// 加载语法
/// </summary>
private void Loadgrammar()
{
try
{
//sess_id = PtrToStr(MscDll.QISRSessionBegin(grammar, param, ref ret));
/*ABNF语法*/
sess_id = PtrToStr(MscDll.QISRSessionBegin(null, "rst=plain,sub=asr,ssm=1,aue=speex,auf=audio/L16;rate=16000,cfd=350", ref ret));
em.GetError(ret);
string grammar1 = "#ABNF 1.0 GB2312;\n mode voice;\n language zh-cn;\n\n\n root $main;\n public $main = 我[想要]看$place1;\n $place1=足球;\n";
ret = MscDll.QISRGrammarActivate(sess_id, grammar1, "abnf", 0);//将语法ID传入QISRSessionBegin
/*end */
em.GetError(ret);
}
catch (MscException ex)
{
RaiseError(ex.Message.ToString());
}
}
常量的枚举:
#region 错误代号
enum ErrorCode
{
MSP_SUCCESS= 0,
MSP_ERROR_FAIL = -1,
MSP_ERROR_EXCEPTION = -2,
/* General errors 10100(0x2774) */
MSP_ERROR_GENERAL = 10100, /* 0x2774 */
MSP_ERROR_OUT_OF_MEMORY = 10101, /* 0x2775 */
MSP_ERROR_FILE_NOT_FOUND = 10102, /* 0x2776 */
MSP_ERROR_NOT_SUPPORT = 10103, /* 0x2777 */
MSP_ERROR_NOT_IMPLEMENT = 10104, /* 0x2778 */
MSP_ERROR_ACCESS = 10105, /* 0x2779 */
MSP_ERROR_INVALID_PARA = 10106, /* 0x277A */
MSP_ERROR_INVALID_PARA_VALUE = 10107, /* 0x277B */
MSP_ERROR_INVALID_HANDLE = 10108, /* 0x277C */
MSP_ERROR_INVALID_DATA = 10109, /* 0x277D */
MSP_ERROR_NO_LICENSE = 10110, /* 0x277E */
MSP_ERROR_NOT_INIT = 10111, /* 0x277F */
MSP_ERROR_NULL_HANDLE = 10112, /* 0x2780 */
MSP_ERROR_OVERFLOW = 10113, /* 0x2781 */
MSP_ERROR_TIME_OUT = 10114, /* 0x2782 */
MSP_ERROR_OPEN_FILE = 10115, /* 0x2783 */
MSP_ERROR_NOT_FOUND = 10116, /* 0x2784 */
MSP_ERROR_NO_ENOUGH_BUFFER = 10117, /* 0x2785 */
MSP_ERROR_NO_DATA = 10118, /* 0x2786 */
MSP_ERROR_NO_MORE_DATA = 10119, /* 0x2787 */
MSP_ERROR_SKIPPED = 10120, /* 0x2788 */
MSP_ERROR_ALREADY_EXIST = 10121, /* 0x2789 */
MSP_ERROR_LOAD_MODULE = 10122, /* 0x278A */
MSP_ERROR_BUSY = 10123, /* 0x278B */
MSP_ERROR_INVALID_CONFIG = 10124, /* 0x278C */
MSP_ERROR_VERSION_CHECK = 10125, /* 0x278D */
MSP_ERROR_CANCELED = 10126, /* 0x278E */
MSP_ERROR_INVALID_MEDIA_TYPE = 10127, /* 0x278F */
MSP_ERROR_CONFIG_INITIALIZE = 10128, /* 0x2790 */
MSP_ERROR_CREATE_HANDLE = 10129, /* 0x2791 */
MSP_ERROR_CODING_LIB_NOT_LOAD = 10130, /* 0x2792 */
/* Error codes of network 10200(0x27D8)*/
MSP_ERROR_NET_GENERAL = 10200, /* 0x27D8 */
MSP_ERROR_NET_OPENSOCK = 10201, /* 0x27D9 */ /* Open socket */
MSP_ERROR_NET_CONNECTSOCK = 10202, /* 0x27DA */ /* Connect socket */
MSP_ERROR_NET_ACCEPTSOCK = 10203, /* 0x27DB */ /* Accept socket */
MSP_ERROR_NET_SENDSOCK = 10204, /* 0x27DC */ /* Send socket data */
MSP_ERROR_NET_RECVSOCK = 10205, /* 0x27DD */ /* Recv socket data */
MSP_ERROR_NET_INVALIDSOCK = 10206, /* 0x27DE */ /* Invalid socket handle */
MSP_ERROR_NET_BADADDRESS = 10207, /* 0x27EF */ /* Bad network address */
MSP_ERROR_NET_BINDSEQUENCE = 10208, /* 0x27E0 */ /* Bind after listen/connect */
MSP_ERROR_NET_NOTOPENSOCK = 10209, /* 0x27E1 */ /* Socket is not opened */
MSP_ERROR_NET_NOTBIND = 10210, /* 0x27E2 */ /* Socket is not bind to an address */
MSP_ERROR_NET_NOTLISTEN = 10211, /* 0x27E3 */ /* Socket is not listenning */
MSP_ERROR_NET_CONNECTCLOSE = 10212, /* 0x27E4 */ /* The other side of connection is closed */
MSP_ERROR_NET_NOTDGRAMSOCK = 10213, /* 0x27E5 */ /* The socket is not datagram type */
MSP_ERROR_NET_DNS=10214,
/* Error codes of mssp message 10300(0x283C) */
MSP_ERROR_MSG_GENERAL = 10300, /* 0x283C */
MSP_ERROR_MSG_PARSE_ERROR = 10301, /* 0x283D */
MSP_ERROR_MSG_BUILD_ERROR = 10302, /* 0x283E */
MSP_ERROR_MSG_PARAM_ERROR = 10303, /* 0x283F */
MSP_ERROR_MSG_CONTENT_EMPTY = 10304, /* 0x2840 */
MSP_ERROR_MSG_INVALID_CONTENT_TYPE = 10305, /* 0x2841 */
MSP_ERROR_MSG_INVALID_CONTENT_LENGTH = 10306, /* 0x2842 */
MSP_ERROR_MSG_INVALID_CONTENT_ENCODE = 10307, /* 0x2843 */
MSP_ERROR_MSG_INVALID_KEY = 10308, /* 0x2844 */
MSP_ERROR_MSG_KEY_EMPTY = 10309, /* 0x2845 */
MSP_ERROR_MSG_SESSION_ID_EMPTY = 10310, /* 0x2846 */
MSP_ERROR_MSG_LOGIN_ID_EMPTY = 10311, /* 0x2847 */
MSP_ERROR_MSG_SYNC_ID_EMPTY = 10312, /* 0x2848 */
MSP_ERROR_MSG_APP_ID_EMPTY = 10313, /* 0x2849 */
MSP_ERROR_MSG_EXTERN_ID_EMPTY = 10314, /* 0x284A */
MSP_ERROR_MSG_INVALID_CMD = 10315, /* 0x284B */
MSP_ERROR_MSG_INVALID_SUBJECT = 10316, /* 0x284C */
MSP_ERROR_MSG_INVALID_VERSION = 10317, /* 0x284D */
MSP_ERROR_MSG_NO_CMD = 10318, /* 0x284E */
MSP_ERROR_MSG_NO_SUBJECT = 10319, /* 0x284F */
MSP_ERROR_MSG_NO_VERSION = 10320, /* 0x2850 */
MSP_ERROR_MSG_MSSP_EMPTY = 10321, /* 0x2851 */
MSP_ERROR_MSG_NEW_RESPONSE = 10322, /* 0x2852 */
MSP_ERROR_MSG_NEW_CONTENT = 10323, /* 0x2853 */
MSP_ERROR_MSG_INVALID_SESSION_ID = 10324, /* 0x2854 */
/* Error codes of DataBase 10400(0x28A0)*/
MSP_ERROR_DB_GENERAL = 10400, /* 0x28A0 */
MSP_ERROR_DB_EXCEPTION = 10401, /* 0x28A1 */
MSP_ERROR_DB_NO_RESULT = 10402, /* 0x28A2 */
MSP_ERROR_DB_INVALID_USER = 10403, /* 0x28A3 */
MSP_ERROR_DB_INVALID_PWD = 10404, /* 0x28A4 */
MSP_ERROR_DB_CONNECT = 10405, /* 0x28A5 */
MSP_ERROR_DB_INVALID_SQL = 10406, /* 0x28A6 */
MSP_ERROR_DB_INVALID_APPID = 10407, /* 0x28A7 */
/* Error codes of Resource 10500(0x2904)*/
MSP_ERROR_RES_GENERAL = 10500, /* 0x2904 */
MSP_ERROR_RES_LOAD = 10501, /* 0x2905 */ /* Load resource */
MSP_ERROR_RES_FREE = 10502, /* 0x2906 */ /* Free resource */
MSP_ERROR_RES_MISSING = 10503, /* 0x2907 */ /* Resource File Missing */
MSP_ERROR_RES_INVALID_NAME = 10504, /* 0x2908 */ /* Invalid resource file name */
MSP_ERROR_RES_INVALID_ID = 10505, /* 0x2909 */ /* Invalid resource ID */
MSP_ERROR_RES_INVALID_IMG = 10506, /* 0x290A */ /* Invalid resource image pointer */
MSP_ERROR_RES_WRITE = 10507, /* 0x290B */ /* Write read-only resource */
MSP_ERROR_RES_LEAK = 10508, /* 0x290C */ /* Resource leak out */
MSP_ERROR_RES_HEAD = 10509, /* 0x290D */ /* Resource head currupt */
MSP_ERROR_RES_DATA = 10510, /* 0x290E */ /* Resource data currupt */
MSP_ERROR_RES_SKIP = 10511, /* 0x290F */ /* Resource file skipped */
/* Error codes of TTS 10600(0x2968)*/
MSP_ERROR_TTS_GENERAL = 10600, /* 0x2968 */
MSP_ERROR_TTS_TEXTEND = 10601, /* 0x2969 */ /* Meet text end */
MSP_ERROR_TTS_TEXT_EMPTY = 10602, /* 0x296A */ /* no synth text */
/* Error codes of Recognizer 10700(0x29CC) */
MSP_ERROR_REC_GENERAL = 10700, /* 0x29CC */
MSP_ERROR_REC_INACTIVE = 10701, /* 0x29CD */
MSP_ERROR_REC_GRAMMAR_ERROR = 10702, /* 0x29CE */
MSP_ERROR_REC_NO_ACTIVE_GRAMMARS = 10703, /* 0x29CF */
MSP_ERROR_REC_DUPLICATE_GRAMMAR = 10704, /* 0x29D0 */
MSP_ERROR_REC_INVALID_MEDIA_TYPE = 10705, /* 0x29D1 */
MSP_ERROR_REC_INVALID_LANGUAGE = 10706, /* 0x29D2 */
MSP_ERROR_REC_URI_NOT_FOUND = 10707, /* 0x29D3 */
MSP_ERROR_REC_URI_TIMEOUT = 10708, /* 0x29D4 */
MSP_ERROR_REC_URI_FETCH_ERROR = 10709, /* 0x29D5 */
/* Error codes of Speech Detector 10800(0x2A30) */
MSP_ERROR_EP_GENERAL = 10800, /* 0x2A30 */
MSP_ERROR_EP_NO_SESSION_NAME = 10801, /* 0x2A31 */
MSP_ERROR_EP_INACTIVE = 10802, /* 0x2A32 */
MSP_ERROR_EP_INITIALIZED = 10803, /* 0x2A33 */
/* Error codes of TUV */
MSP_ERROR_TUV_GENERAL = 10900, /* 0x2A94 */
MSP_ERROR_TUV_GETHIDPARAM = 10901, /* 0x2A95 */ /* Get Busin Param huanid*/
MSP_ERROR_TUV_TOKEN = 10902, /* 0x2A96 */ /* Get Token */
MSP_ERROR_TUV_CFGFILE = 10903, /* 0x2A97 */ /* Open cfg file */
MSP_ERROR_TUV_RECV_CONTENT = 10904, /* 0x2A98 */ /* received content is error */
MSP_ERROR_TUV_VERFAIL = 10905, /* 0x2A99 */ /* Verify failure */
/* Error codes of IMTV */
MSP_ERROR_IMTV_SUCCESS = 11000, /* 0x2AF8 */ /* 成功 */
MSP_ERROR_IMTV_NO_LICENSE = 11001, /* 0x2AF9 */ /* 试用次数结束,用户需要付费 */
MSP_ERROR_IMTV_SESSIONID_INVALID = 11002, /* 0x2AFA */ /* SessionId失效,需要重新登录通行证 */
MSP_ERROR_IMTV_SESSIONID_ERROR = 11003, /* 0x2AFB */ /* SessionId为空,或者非法 */
MSP_ERROR_IMTV_UNLOGIN = 11004, /* 0x2AFC */ /* 未登录通行证 */
MSP_ERROR_IMTV_SYSTEM_ERROR = 11005, /* 0x2AFD */ /* 系统错误 */
/* Error codes of HCR */
MSP_ERROR_HCR_GENERAL = 11100,
MSP_ERROR_HCR_RESOURCE_NOT_EXIST = 11101,
/* Error codes of http 12000(0x2EE0) */
MSP_ERROR_HTTP_BASE = 12000, /* 0x2EE0 */
/*Error codes of ISV */
MSP_ERROR_ISV_NO_USER = 13000, /* 32C8 */ /* the user doesn't exist */
}
#endregion
#region ISR枚举常量
public enum AudioStatus
{
ISR_AUDIO_SAMPLE_INIT = 0x00,
ISR_AUDIO_SAMPLE_FIRST = 0x01,
ISR_AUDIO_SAMPLE_CONTINUE = 0x02,
ISR_AUDIO_SAMPLE_LAST = 0x04,
ISR_AUDIO_SAMPLE_SUPPRESSED = 0x08,
ISR_AUDIO_SAMPLE_LOST = 0x10,
ISR_AUDIO_SAMPLE_NEW_CHUNK = 0x20,
ISR_AUDIO_SAMPLE_END_CHUNK = 0x40,
ISR_AUDIO_SAMPLE_VALIDBITS = 0x7f /* to validate the value of sample->status */
}
public enum EpStatus
{
ISR_EP_NULL = -1,
ISR_EP_LOOKING_FOR_SPEECH = 0, ///还没有检测到音频的前端点
ISR_EP_IN_SPEECH = 1, ///已经检测到了音频前端点,正在进行正常的音频处理。
ISR_EP_AFTER_SPEECH = 3, ///检测到音频的后端点,后继的音频会被MSC忽略。
ISR_EP_TIMEOUT = 4, ///超时
ISR_EP_ERROR = 5, ///出现错误
ISR_EP_MAX_SPEECH = 6 ///音频过大
}
public enum RecogStatus
{
ISR_REC_NULL = -1,
ISR_REC_STATUS_SUCCESS = 0, ///识别成功,此时用户可以调用QISRGetResult来获取(部分)结果。
ISR_REC_STATUS_NO_MATCH = 1, ///识别结束,没有识别结果
ISR_REC_STATUS_INCOMPLETE = 2, ///正在识别中
ISR_REC_STATUS_NON_SPEECH_DETECTED = 3, ///保留
ISR_REC_STATUS_SPEECH_DETECTED = 4, ///发现有效音频
ISR_REC_STATUS_SPEECH_COMPLETE = 5, ///识别结束
ISR_REC_STATUS_MAX_CPU_TIME = 6, ///保留
ISR_REC_STATUS_MAX_SPEECH = 7, ///保留
ISR_REC_STATUS_STOPPED = 8, ///保留
ISR_REC_STATUS_REJECTED = 9, ///保留
ISR_REC_STATUS_NO_SPEECH_FOUND = 10 ///没有发现音频
}
#endregion
}
自定义异常:
[Serializable] //声明为可序列化的 因为要写入文件中
public class MscException : ApplicationException//由用户程序引发,用于派生自定义的异常类型
{
/// <summary>
/// 默认构造函数
/// </summary>
public MscException() { }
public MscException(string message)
: base(message) { }
public MscException(string message, Exception inner)
: base(message, inner) { }
}
/// <summary>
/// 是否出错
/// </summary>
internal class RecoErrors
{
public RecoErrors() { }
/// <summary>
/// 是否发生错误.
/// </summary>
/// <param name="id">错误ID</param>
public virtual void GetError(int id)
{
if (id != 0)
{
var ex = new MscException(((ErrorCode)id).ToString("G"));
// var ex = new MscException(Enum.GetName(typeof(ErrorCode),id));
throw ex;
}
}
}
讯飞语音支持边录边上传,不过我这里采用是一次性上传。起初我采用的是边录边上传,不过感觉有数字混合后识别正常率不好(还没跟讯飞那边沟通。),最后才使用一次性上传,毕竟语音文件也不是大就200KB一下。讯飞语音识别不支持多线程识别。
我做的这个语音识别产品,做成服务端与客户端。服务端:放在一个能连接外网机子上提供语音识别(做了一个简单队列),客户端:将音频数据采集后发送到局域网内的语音识别服务端进行识别。
以上有些代码是借助别人的,第一次写大家尽量不要吐槽,不过可以给点意见。大家相互学习...