window7 语音识别开发(sapi)

参考:http://msdn.microsoft.com/en-us/library/ee125663(v=vs.85).aspx    (sapi5.4 reference)

http://msdn.microsoft.com/zh-cn/library/ms723634    Grammar Format Tags (SAPI 5.3)
http://blog.csdn.net/zhubenfulovepoem/article/details/6803505  语音控制

http://hi.baidu.com/bxybao/item/693fc8098aa36c17acdc704f  sapi5.1介绍


开发步骤:

1 sapi 是基于com的接口,所以应用程序开发需要遵循com调用规则

hr = ::CoInitialize(NULL);

.........

::CoUninitialize();


2 sapi 语音识别主要接口

(1)   语音识别引擎(ISpRecognizer)接口:用于创建语音识别引擎的实例。语音识别引擎对象有两种:独占(InProcRecognizer)的引擎和共享(SharedRecognizer)的引擎。独占的引擎对象只能由创建的应用程序使用,而共享的引擎可以

供多个应用程序共同使用。

(2)   语音识别上下文(ISpRecoContext)接口:主要用于发送和接收与语音识别相关的消息通知,创建语法规则对象。

(3)   语法规则(ISpRecoGrammar)接口:定义引擎需要识别的具体内容,用于创建、载入和激活识别用的语法规则。而语法规则定义了期望识别的单词、短语和句子,通常有两种语法规则:听写语法(DictationGrammer)和命令控制语法(Command and Control Grammer)。命令控制语法主要用于识别用户在语法文件里自定义的一些特定的命令词汇和句子,这些语法规则以XML文件的格式编写,通过(ISpRecoGrammar)接口载入,并激活。

(4)   识别结果(ISpPhrase)接口:用于获取识别的结果,包括识别的文字,识别的语法规则等。

(5)   语音合成(ISpVoice)接口:主要功能是通过访问TTS引擎实现文本到语音的转换,从而使电脑会说话。


CComPtr<ISpRecoContext> cpRecoCtxt;   //语音识别上下文接口
CComPtr<ISpRecoGrammar> cpGrammar;    //语法规则接口
CComPtr<ISpVoice> cpVoice;            //语音合成接口 主要功能是通过访问TTS引擎实现文本到语音的转换,从而使电脑会说话。
CComPtr<ISpRecognizer> cpRecognizer;  // 语音识别引擎
CComPtr<ISpAudio> m_pAudio;           // 创建进程内语音识别引擎需要的音频接口
CComPtr<ISpRecoResult>        
                 

3 example

// cpp_Aes.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
/*
#include "aes.h"
#include <string.h>

using namespace std;
void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
	size_t len, const AES_KEY *key,
	unsigned char *ivec, const int enc) 
int _tmain(int argc, _TCHAR* argv[])
{
	unsigned char iv[16] ;
	strncpy((char*)iv,"0102030405060708",16);
	const  char* intext = "http://www.baidu.com";
	AES_KEY key ={0};
	AES_set_encrypt_key((unsigned char*)"0102030405060708",128,&key);
	unsigned char out[1024] ={0}; 
	unsigned char in[1024] ={0};
	memset(in,0x0c,1024);
	memcpy(in,intext,20);
	AES_cbc_encrypt((unsigned char*)in,out,32,&key,iv,1);

	for (int i=0; i < 32 ;i ++)
	{
		printf("%02X ", out[i]);
	}
	return 0;


}
*/
#include <windows.h>
#include <sapi.h>
#include <stdio.h>
#include <string.h>
#include <atlbase.h>
#include "sphelper.h"
//Copyright (c) Microsoft Corporation. All rights reserved.

inline HRESULT BlockForResult(ISpRecoContext * pRecoCtxt, ISpRecoResult ** ppResult)
{
	HRESULT hr = S_OK;
	CSpEvent event;

	while (SUCCEEDED(hr) &&
		SUCCEEDED(hr = event.GetFrom(pRecoCtxt)) &&
		hr == S_FALSE)
	{
		hr = pRecoCtxt->WaitForNotifyEvent(INFINITE);
	}

	*ppResult = event.RecoResult();
	if (*ppResult)
	{
		(*ppResult)->AddRef();
	}

	return hr;
}

const WCHAR * StopWord()
{
	const WCHAR * pchStop;

	LANGID LangId = ::SpGetUserDefaultUILanguage();

	switch (LangId)
	{
	case MAKELANGID(LANG_JAPANESE, SUBLANG_DEFAULT):
	//case MAKELANGID(LANG_ENGLISH, SUBLANG_DEFAULT):
		pchStop = L"}42N86/0b70e50fc0ea0e70fc/05708504608a087046";;
		break;

	default:
		pchStop = L"Stop";
		break;
	}

	return pchStop;
}



void __stdcall SPNOTIFYCALLBACK1(WPARAM wParam, LPARAM lParam)
{
	int x = 0;

	return ;
}

int main(int argc, char* argv[])
{
	HRESULT hr = E_FAIL;
	bool fUseTTS = true;            // turn TTS play back on or off
	bool fReplay = true;            // turn Audio replay on or off

	// Process optional arguments
	if (argc > 1)
	{
		int i;

		for (i = 1; i < argc; i++)
		{
			if (_stricmp(argv[i], "-noTTS") == 0)
			{
				fUseTTS = false;
				continue;
			}
			if (_stricmp(argv[i], "-noReplay") == 0)
			{
				fReplay = false;
				continue;
			}       
			printf ("Usage: %s [-noTTS] [-noReplay]  ", argv[0]);
			return hr;
		}
	}

	if (SUCCEEDED(hr = ::CoInitialize(NULL)))
	{
		{
			CComPtr<ISpRecoContext> cpRecoCtxt;
			CComPtr<ISpRecoGrammar> cpGrammar;
			CComPtr<ISpVoice> cpVoice;
			CComPtr<ISpRecognizer> cpRecognizer;
			CComPtr<ISpAudio> m_pAudio;

			//hr = cpRecoCtxt.CoCreateInstance(CLSID_SpSharedRecoContext);
			hr = cpRecoCtxt.CoCreateInstance(CLSID_SpInProcRecoContext);
			
			if(SUCCEEDED(hr))
			{
				hr = cpRecoCtxt->GetVoice(&cpVoice);
			}
			hr = cpRecoCtxt->GetRecognizer(&cpRecognizer);

			hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN,&m_pAudio);
			hr = cpRecognizer->SetInput(m_pAudio,TRUE);
			
			cpRecognizer->SetRecoState(SPRST_ACTIVE);

			hr = cpRecoCtxt->SetNotifyWin32Event();
			hr = cpRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION));
			hr = cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL);
			hr = cpRecoCtxt->CreateGrammar(0, &cpGrammar);
			hr = cpGrammar->LoadCmdFromFile(L"cmd.xml",SPLO_DYNAMIC);
			int err = FAILED(hr);
			hr = cpGrammar->SetRuleState( NULL,NULL,SPRS_ACTIVE );

			//hr = cpRecoCtxt->SetNotifyCallbackFunction(SPNOTIFYCALLBACK,)


			/*if (cpRecoCtxt && cpVoice &&
				SUCCEEDED(hr = cpRecoCtxt->SetNotifyWin32Event()) &&
				SUCCEEDED(hr = cpRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION))) &&
				SUCCEEDED(hr = cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL)) &&
				SUCCEEDED(hr = cpRecoCtxt->CreateGrammar(0, &cpGrammar)) &&
				SUCCEEDED(hr = cpGrammar->LoadCmdFromFile(L"cmd.xml",SPLO_DYNAMIC)) &&
				SUCCEEDED(hr = cpGrammar->SetRuleState( NULL,NULL,SPRS_ACTIVE )))*/
				//SUCCEEDED(hr = cpRecoCtxt->CreateGrammar(0, &cpGrammar)) &&
				//SUCCEEDED(hr = cpGrammar->LoadDictation(NULL, SPLO_STATIC)) &&
				//SUCCEEDED(hr = cpGrammar->SetDictationState(SPRS_ACTIVE)))
			{
				USES_CONVERSION;

				const WCHAR * const pchStop = StopWord();
				CComPtr<ISpRecoResult> cpResult;
				
				

				printf( "I will repeat everything you say. Say \" %s \" to exit. ", W2A(pchStop) );
				while(true)
				//while (SUCCEEDED(hr = BlockForResult(cpRecoCtxt, &cpResult)))
				{	
					//cpGrammar->SetDictationState( SPRS_INACTIVE );
					hr = cpRecoCtxt->WaitForNotifyEvent(INFINITE);
					cpGrammar->SetRuleState( NULL,NULL,SPRS_INACTIVE );
					CSpDynamicString dstrText;					
					hr = BlockForResult(cpRecoCtxt, &cpResult);
					if (SUCCEEDED(cpResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, 
						TRUE, &dstrText, NULL)))
					{
						printf("I heard:  %s ", W2A(dstrText));

						if (fUseTTS)
						{
							cpVoice->Speak( L"I heard", SPF_ASYNC, NULL);
							cpVoice->Speak( dstrText, SPF_ASYNC, NULL );
						}

						if (fReplay)
						{
							if (fUseTTS)
								cpVoice->Speak( L"when you said", SPF_ASYNC, NULL);
							else
								printf (" when you said... ");
							cpResult->SpeakAudio(NULL, 0, NULL, NULL);
						}

						cpResult.Release();
					}
					if (_wcsicmp(dstrText, pchStop) == 0)
					{
						break;
					}
					//cpGrammar->SetDictationState( SPRS_ACTIVE );
					cpGrammar->SetRuleState( NULL,NULL,SPRS_ACTIVE );
					
				} 
			}
		}
		::CoUninitialize();
	}
	return hr;
}

4 配置文件

<GRAMMAR>
		<DEFINE>
			<ID NAME="TheNumberFive" VAL="5"/>
		</DEFINE>

		<!-- Note that the ID takes a number, which is actually "5" -->
		<RULE ID="TheNumberFive" TOPLEVEL="ACTIVE">
			<List>
			<P>打开灯源</P>
                        <P>关闭灯源</P>
			<P>开一号灯</P>
                        <P>开二号灯</P>
                        <P>关闭一号灯</P>
                        <P>增亮一号灯</P>
                        <P>全部关闭</P>
                        <P>打开厨房灯</P>
                        
                      </List>
		</RULE>
	</GRAMMAR>


你可能感兴趣的:(window7 语音识别开发(sapi))