实现同步识别和转语音的micro.py
from win32com.client import constants as _constants
import win32com.client
import pythoncom
import time
from threading import Thread, Event
import os
import threading
# Make sure that we've got our COM wrappers generated.
from win32com.client import gencache
gencache.EnsureModule('{C866CA3A-32F7-11D2-9602-00C04F8EE628}', 0, 5, 0)
_ListenerBase = win32com.client.getevents("SAPI.SpInProcRecoContext")
# 这里加载SpInProcRecognizer,就不会报【尚未调用 CoInitialize】的错误
# 猜测可能是只要加载此模块,就默认成为多线程模式?
_recognizer = win32com.client.Dispatch("SAPI.SpInProcRecognizer")
class _ListenerCallback(_ListenerBase):
def __init__(self, oobj, listener, callback):
_ListenerBase.__init__(self, oobj)
print("444444")
self._listener = listener
self._callback = callback
def OnRecognition(self, StreamNumber, StreamPosition, RecognitionType, Result):
print('-------Recognition------pid: %d, thd: %s' % (os.getpid(), threading.currentThread()))
if self._listener and not self._listener.is_listening():
self.close()
self._listener = None
if self._callback and self._listener:
new_result = win32com.client.Dispatch(Result)
phraseinfo = new_result.PhraseInfo
phrase = phraseinfo.GetText()
audio_stream_position = float(phraseinfo.AudioStreamPosition)
confidence = float(new_result.PhraseInfo.Elements.Item(0).EngineConfidence)
self._callback('Recognition', phrase, confidence, self._listener, audio_stream_position)
def OnStartStream(self, StreamNumber, StreamPosition):
print('-------StartStream------')
if self._listener and not self._listener.is_listening():
self.close()
self._listener = None
# print("pid: %d, thd: %s" % (os.getpid(), threading.currentThread()))
self._callback('StartStream', None, None, self._listener, None)
print(888888)
def OnEndStream(self, StreamNumber, StreamPosition, StreamReleased):
print('-------EndStream------')
if self._listener and not self._listener.is_listening():
self.close()
self._listener = None
self._callback('EndStream', None, None, self._listener, None)
print(999999)
self.close()
self._listener = None
def listen_for(word_list, inputtype, wavpath, memstream=b''):
listener = Listener(word_list, inputtype, wavpath, memstream)
return listener.loop_listen()
class Listener(object):
def __init__(self, phrase_list, inputtype, wavname, streamcontent):
print('!!!!!!!!!!!!!!!!!!')
self.recognizer = win32com.client.Dispatch("SAPI.SpInProcRecognizer")
self.listenerBase = win32com.client.getevents("SAPI.SpInProcRecoContext")
print("id(recognizer)", id(self.recognizer))
self.phrase_list = phrase_list
self.inputtype = inputtype
self.context = self.recognizer.CreateRecoContext()
self.__is_shut_down = Event()
self.__shutdown_listen = False
self._filestream = None
self._memorystream = None
if self.inputtype == 0:
self.recognizer.AudioInputStream = win32com.client.Dispatch("SAPI.SpMMAudioIn")
elif self.inputtype == 1:
filestream = win32com.client.Dispatch("SAPI.SpFileStream")
filestream.Open(wavname, 0, False)
self.recognizer.AudioInputStream = filestream
self._filestream = filestream
else:
memstream = win32com.client.Dispatch("SAPI.SpMemoryStream")
memstream.SetData(streamcontent)
self.recognizer.AudioInputStream = memstream
self._memorystream = memstream
context = self.context
grammar = context.CreateGrammar()
grammar.DictationSetState(0)
# dunno why we pass the constants that we do here
rule = grammar.Rules.Add("rule",
_constants.SRATopLevel + _constants.SRADynamic, 0)
rule.Clear()
for phrase in self.phrase_list:
rule.InitialState.AddWordTransition(None, phrase)
# not sure if this is needed - was here before but dupe is below
grammar.Rules.Commit()
# Commit the changes to the grammar
grammar.CmdSetRuleState("rule", 1) # active
grammar.Rules.Commit()
self._gramar = grammar
self.result_list = []
def loop_listen(self):
def new_thread():
print("pid", os.getpid())
a = _ListenerCallback(self.context, self, self.callback)
print("id(a)", id(a))
while not self.__shutdown_listen:
# print(111111)
pythoncom.PumpWaitingMessages() # 触发
# print(222222)
time.sleep(.2)
self.__is_shut_down.set()
p = Thread(target=new_thread(), args=())
p.start()
# 阻塞等待识别处理结束
self.__is_shut_down.wait()
# 返回识别结果
return self.get_result()
def callback(self, event, phrase, confidence, listener, audio_stream_position):
# print("callback envent", event, phrase, confidence)
if event == 'Recognition':
self.result_list.append([phrase, confidence, audio_stream_position])
elif event == 'StartStream':
self.result_list = []
elif event == 'EndStream':
self.stop_listening()
# pass
def get_result(self):
return self.result_list
def is_listening(self):
return not self.__shutdown_listen
def stop_listening(self):
self.__shutdown_listen = True
#_gramar置为None, 则event handler can die
self._gramar = None
# 如果有filestream或memorystrem,要关闭
if self._filestream:
self._filestream.Close()
def say_to_file(context, wavpath):
"""
如果采用全局_voice, 则多线程时会产生冲突
因此这里每调用一次函数就新创建一个voice对象
"""
new_voice = win32com.client.Dispatch("SAPI.SpVoice")
filestream = win32com.client.Dispatch("SAPI.SpFileStream")
# SSFMOpenForRead(0) SSFMOpenReadWrite(1) SSFMCreate(2) SSFMCreateForWrite(3)
# 是否产生voice event, False
filestream.Open(wavpath, _constants.SSFMCreateForWrite, False) #
new_voice.AudioOutputStream = filestream
new_voice.Speak(context)
filestream.Close()
def say_to_memory(context):
"""
如果采用全局_voice, 则多线程时会产生冲突
因此这里每调用一次函数就新创建一个voice对象
"""
new_voice = win32com.client.Dispatch("SAPI.SpVoice")
memstream = win32com.client.Dispatch("SAPI.SpMemoryStream")
new_voice.AudioOutputStream = memstream
new_voice.Speak(context)
content = bytes(memstream.GetData())
return content
原生的py
"""
speech recognition and voice synthesis module.
Please let me know if you like or use this module -- it would make my day!
speech.py: Copyright 2008 Michael Gundlach (gundlach at gmail)
License: Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
For this module to work, you'll need pywin32 (http://tinyurl.com/5ezco9
for Python 2.5 or http://tinyurl.com/5uzpox for Python 2.4) and
the Microsoft Speech kit (http://tinyurl.com/zflb).
Classes:
Listener: represents a command to execute when phrases are heard.
Functions:
say(phrase): Say the given phrase out loud.
input(prompt, phraselist): Block until input heard, then return text.
stoplistening(): Like calling stoplistening() on all Listeners.
islistening(): True if any Listener is listening.
listenforanything(callback): Run a callback when any text is heard.
listenfor(phraselist, callback): Run a callback when certain text is heard.
Very simple usage example:
import speech
speech.say("Say something.")
print "You said " + speech.input()
def L1callback(phrase, listener):
print phrase
def L2callback(phrase, listener):
if phrase == "wow":
listener.stoplistening()
speech.say(phrase)
# callbacks are executed on a separate events thread.
L1 = speech.listenfor(["hello", "good bye"], L1callback)
L2 = speech.listenforanything(L2callback)
assert speech.islistening()
assert L2.islistening()
L1.stoplistening()
assert not L1.islistening()
speech.stoplistening()
"""
from win32com.client import constants as _constants
import win32com.client
import pythoncom
import time
import thread
# Make sure that we've got our COM wrappers generated.
from win32com.client import gencache
gencache.EnsureModule('{C866CA3A-32F7-11D2-9602-00C04F8EE628}', 0, 5, 0)
_voice = win32com.client.Dispatch("SAPI.SpVoice")
_recognizer = win32com.client.Dispatch("SAPI.SpSharedRecognizer")
_listeners = []
_handlerqueue = []
_eventthread=None
class Listener(object):
"""Listens for speech and calls a callback on a separate thread."""
_all = set()
def __init__(self, context, grammar, callback):
"""
This should never be called directly; use speech.listenfor()
and speech.listenforanything() to create Listener objects.
"""
self._grammar = grammar
Listener._all.add(self)
# Tell event thread to create an event handler to call our callback
# upon hearing speech events
_handlerqueue.append((context, self, callback))
_ensure_event_thread()
def islistening(self):
"""True if this Listener is listening for speech."""
return self in Listener._all
def stoplistening(self):
"""Stop listening for speech. Returns True if we were listening."""
try:
Listener._all.remove(self)
except KeyError:
return False
# This removes all refs to _grammar so the event handler can die
self._grammar = None
if not Listener._all:
global _eventthread
_eventthread = None # Stop the eventthread if it exists
return True
_ListenerBase = win32com.client.getevents("SAPI.SpSharedRecoContext")
class _ListenerCallback(_ListenerBase):
"""Created to fire events upon speech recognition. Instances of this
class automatically die when their listener loses a reference to
its grammar. TODO: we may need to call self.close() to release the
COM object, and we should probably make goaway() a method of self
instead of letting people do it for us.
"""
def __init__(self, oobj, listener, callback):
_ListenerBase.__init__(self, oobj)
self._listener = listener
self._callback = callback
def OnRecognition(self, _1, _2, _3, Result):
# When our listener stops listening, it's supposed to kill this
# object. But COM can be funky, and we may have to call close()
# before the object will die.
if self._listener and not self._listener.islistening():
self.close()
self._listener = None
if self._callback and self._listener:
newResult = win32com.client.Dispatch(Result)
phrase = newResult.PhraseInfo.GetText()
self._callback(phrase, self._listener)
def say(phrase):
"""Say the given phrase out loud."""
_voice.Speak(phrase)
def input(prompt=None, phraselist=None):
"""
Print the prompt if it is not None, then listen for a string in phraselist
(or anything, if phraselist is None.) Returns the string response that is
heard. Note that this will block the thread until a response is heard or
Ctrl-C is pressed.
"""
def response(phrase, listener):
if not hasattr(listener, '_phrase'):
listener._phrase = phrase # so outside caller can find it
listener.stoplistening()
if prompt:
print prompt
if phraselist:
listener = listenfor(phraselist, response)
else:
listener = listenforanything(response)
while listener.islistening():
time.sleep(.1)
return listener._phrase # hacky way to pass back a response...
def stoplistening():
"""
Cause all Listeners to stop listening. Returns True if at least one
Listener was listening.
"""
listeners = set(Listener._all) # clone so stoplistening can pop()
returns = [l.stoplistening() for l in listeners]
return any(returns) # was at least one listening?
def islistening():
"""True if any Listeners are listening."""
return not not Listener._all
def listenforanything(callback):
"""
When anything resembling English is heard, callback(spoken_text, listener)
is executed. Returns a Listener object.
The first argument to callback will be the string of text heard.
The second argument will be the same listener object returned by
listenforanything().
Execution takes place on a single thread shared by all listener callbacks.
"""
return _startlistening(None, callback)
def listenfor(phraselist, callback):
"""
If any of the phrases in the given list are heard,
callback(spoken_text, listener) is executed. Returns a Listener object.
The first argument to callback will be the string of text heard.
The second argument will be the same listener object returned by
listenfor().
Execution takes place on a single thread shared by all listener callbacks.
"""
return _startlistening(phraselist, callback)
def _startlistening(phraselist, callback):
"""
Starts listening in Command-and-Control mode if phraselist is
not None, or dictation mode if phraselist is None. When a phrase is
heard, callback(phrase_text, listener) is executed. Returns a
Listener object.
The first argument to callback will be the string of text heard.
The second argument will be the same listener object returned by
listenfor().
Execution takes place on a single thread shared by all listener callbacks.
"""
# Make a command-and-control grammar
context = _recognizer.CreateRecoContext()
grammar = context.CreateGrammar()
if phraselist:
grammar.DictationSetState(0)
# dunno why we pass the constants that we do here
rule = grammar.Rules.Add("rule",
_constants.SRATopLevel + _constants.SRADynamic, 0)
rule.Clear()
for phrase in phraselist:
rule.InitialState.AddWordTransition(None, phrase)
# not sure if this is needed - was here before but dupe is below
grammar.Rules.Commit()
# Commit the changes to the grammar
grammar.CmdSetRuleState("rule", 1) # active
grammar.Rules.Commit()
else:
grammar.DictationSetState(1)
return Listener(context, grammar, callback)
def _ensure_event_thread():
"""
Make sure the eventthread is running, which checks the handlerqueue
for new eventhandlers to create, and runs the message pump.
"""
global _eventthread
if not _eventthread:
def loop():
while _eventthread:
pythoncom.PumpWaitingMessages()
if _handlerqueue:
(context,listener,callback) = _handlerqueue.pop()
# Just creating a _ListenerCallback object makes events
# fire till listener loses reference to its grammar object
_ListenerCallback(context, listener, callback)
time.sleep(.5)
_eventthread = 1 # so loop doesn't terminate immediately
_eventthread = thread.start_new_thread(loop, ())
"""
Speech recognition and synthesis library for Windows - Python 2 and 3.
Based on the abandoned Python 2 library https://github.com/michaelgundlach/pyspeech.
This module adds contains a few more features, the important one being the ability to create
in-process speech recognizers.
Author: Areeb Beigh
GitHub: https://github.com/areebbeigh/winspeech
License:
Copyright 2016 Areeb Beigh
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from win32com.client import constants as _constants
import win32com.client
import pythoncom
import time
import sys
if sys.version_info.major == 3:
import _thread
else:
# Python 2
import thread as _thread
# Make sure that we've got our COM wrappers generated.
from win32com.client import gencache
__author__ = "Areeb Beigh [[email protected]]"
INPROC_RECOGNIZER = "SAPI.SpInprocRecognizer"
SHARED_RECOGNIZER = "SAPI.SpSharedRecognizer"
gencache.EnsureModule('{C866CA3A-32F7-11D2-9602-00C04F8EE628}', 0, 5, 0)
_voice = win32com.client.Dispatch("SAPI.SpVoice")
_recognizer = win32com.client.Dispatch("SAPI.SpInProcRecognizer")
_recognizer.AudioInputStream = win32com.client.Dispatch("SAPI.SpMMAudioIn")
_ListenerBase = win32com.client.getevents("SAPI.SpInProcRecoContext")
_listeners = []
_handlerqueue = []
_event_thread = None
def initialize_recognizer(recognizer):
"""
Initializes the speech recognizer
Parameters:
recognizer: The type of recognizer to initialize_recognizer. winspeech.INPROC_RECOGNIZER for in process
and winspeech.SHARED_RECOGNIZER for shared recognizer.
"""
global _recognizer, _ListenerBase
if recognizer == INPROC_RECOGNIZER:
_recognizer = win32com.client.Dispatch("SAPI.SpInProcRecognizer")
_recognizer.AudioInputStream = win32com.client.Dispatch("SAPI.SpMMAudioIn")
_ListenerBase = win32com.client.getevents("SAPI.SpInProcRecoContext")
return
if recognizer == SHARED_RECOGNIZER:
_recognizer = win32com.client.Dispatch("SAPI.SpSharedRecognizer")
_ListenerBase = win32com.client.getevents("SAPI.SpSharedRecoContext")
return
raise ValueError(recognizer + " is not a valid recognizer")
class Listener(object):
""" Listens for speech and calls a callback on a separate thread. """
_all = set()
def __init__(self, context, grammar, callback):
"""
This should never be called directly; use speech.listen_for()
and speech.listen_for_anything() to create Listener objects.
"""
self._grammar = grammar
Listener._all.add(self)
# Tell event thread to create an event handler to call our callback
# upon hearing speech events
_handlerqueue.append((context, self, callback))
_ensure_event_thread()
def is_listening(self):
""" True if this Listener is listening for speech. """
return self in Listener._all
def stop_listening(self):
""" Stop listening for speech. Returns True if we were listening. """
try:
Listener._all.remove(self)
except KeyError:
return False
# This removes all refs to _grammar so the event handler can die
self._grammar = None
if not Listener._all:
global _event_thread
_event_thread = None # Stop the eventthread if it exists
return True
class _ListenerCallback(_ListenerBase):
"""
Created to fire events upon speech recognition. Instances of this
class automatically die when their listener loses a reference to
its grammar. TODO: we may need to call self.close() to release the
COM object, and we should probably make goaway() a method of self
instead of letting people do it for us.
"""
def __init__(self, oobj, listener, callback):
_ListenerBase.__init__(self, oobj)
self._listener = listener
self._callback = callback
def OnRecognition(self, _1, _2, _3, Result):
# When our listener stops listening, it's supposed to kill this
# object. But COM can be funky, and we may have to call close()
# before the object will die.
if self._listener and not self._listener.is_listening():
self.close()
self._listener = None
if self._callback and self._listener:
new_result = win32com.client.Dispatch(Result)
phrase = new_result.PhraseInfo.GetText()
self._callback(phrase, self._listener)
def say(phrase):
""" Say the given phrase out loud. This will run on a separate thread. """
_voice.Speak(phrase, 1)
def say_wait(phrase):
""" Say the given phrase out load. This will run on the current thread. """
_voice.Speak(phrase)
def stop_talking():
""" Stop the current utterance. """
_voice.Speak("", 3)
"""
def input(prompt=None, phrase_list=None):
'''
Print the prompt if it is not None, then listen for a string in phrase_list
(or anything, if phrase_list is None.) Returns the string response that is
heard. Note that this will block the thread until a response is heard or
Ctrl-C is pressed.
'''
def response(phrase, listener):
if not hasattr(listener, '_phrase'):
listener._phrase = phrase # so outside caller can find it
listener.stop_listening()
if prompt:
print(prompt)
if phrase_list:
listener = listen_for(phrase_list, response)
else:
listener = listen_for_anything(response)
while listener.is_listening():
time.sleep(.1)
return listener._phrase # hacky way to pass back a response...
"""
def stop_listening():
"""
Cause all Listeners to stop listening. Returns True if at least one
Listener was listening.
"""
listeners = set(Listener._all) # clone so stop_listening can pop()
returns = [l.stop_listening() for l in listeners]
return any(returns) # was at least one listening?
def is_listening():
"""True if any Listeners are listening."""
return not not Listener._all
def listen_for_anything(callback):
"""
When anything resembling English is heard, callback(spoken_text, listener)
is executed. Returns a Listener object.
The first argument to callback will be the string of text heard.
The second argument will be the same listener object returned by
listen_for_anything().
Execution takes place on a single thread shared by all listener callbacks.
"""
return _start_listening(None, callback)
def listen_for(phrase_list, callback):
"""
If any of the phrases in the given list are heard,
callback(spoken_text, listener) is executed. Returns a Listener object.
The first argument to callback will be the string of text heard.
The second argument will be the same listener object returned by
listen_for().
Execution takes place on a single thread shared by all listener callbacks.
"""
return _start_listening(phrase_list, callback)
def _start_listening(phrase_list, callback):
"""
Starts listening in Command-and-Control mode if phrase_list is
not None, or dictation mode if phrase_list is None. When a phrase is
heard, callback(phrase_text, listener) is executed. Returns a
Listener object.
The first argument to callback will be the string of text heard.
The second argument will be the same listener object returned by
listen_for().
Execution takes place on a single thread shared by all listener callbacks.
"""
# Make a command-and-control grammar
context = _recognizer.CreateRecoContext()
grammar = context.CreateGrammar()
if phrase_list:
grammar.DictationSetState(0)
# dunno why we pass the constants that we do here
rule = grammar.Rules.Add("rule",
_constants.SRATopLevel + _constants.SRADynamic, 0)
rule.Clear()
for phrase in phrase_list:
rule.InitialState.AddWordTransition(None, phrase)
# not sure if this is needed - was here before but dupe is below
grammar.Rules.Commit()
# Commit the changes to the grammar
grammar.CmdSetRuleState("rule", 1) # active
grammar.Rules.Commit()
else:
grammar.DictationSetState(1)
return Listener(context, grammar, callback)
def _ensure_event_thread():
"""
Make sure the event thread is running, which checks the handlerqueue
for new event handlers to create, and runs the message pump.
"""
global _event_thread
if not _event_thread:
def loop():
while _event_thread:
pythoncom.PumpWaitingMessages()
if _handlerqueue:
(context, listener, callback) = _handlerqueue.pop()
# Just creating a _ListenerCallback object makes events
# fire till listener loses reference to its grammar object
_ListenerCallback(context, listener, callback)
time.sleep(.5)
_event_thread = 1 # so loop doesn't terminate immediately
_event_thread = _thread.start_new_thread(loop, ())