CODE
- python端的绑定和本文一样,还需要将cdef char* LANGUAGE = b’en’改为中文zh(也可以在函数中配置一个参数修改这个值)。
- ps:本来想尝试
cdef whisper_context* whisper_init_from_file_with_params_no_state(char*, whisper_full_params)
然后进行调用,但是发现最新版的whisper.h没有这个API了,所以先不加了。
import pyaudio
import wave
import struct
import sys
import numpy as np
import pyqtgraph as pg
from PyQt5 import QtWidgets
from PyQt5.QtCore import Qt
from whispercpp import Whisper
# Audio Format (check Audio MIDI Setup if on Mac)
FORMAT = pyaudio.paInt16
RATE = 16000
CHANNELS = 2
# Set Plot Range [-RANGE,RANGE], default is nyquist/2
RANGE = None
if not RANGE:
RANGE = RATE/2
# Set these parameters (How much data to plot per FFT)
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)
# Which Channel? (L or R)
LR = "l"
class SpectrumAnalyzer():
def __init__(self):
self.pa = pyaudio.PyAudio()
self.initMicrophone()
self.initUI()
def find_input_device(self):
device_index = None
for i in range(self.pa.get_device_count()):
devinfo = self.pa.get_device_info_by_index(i)
if devinfo["name"].lower() in ["mic","input"]:
device_index = i
return device_index
def initMicrophone(self):
device_index = self.find_input_device()
self.stream = self.pa.open( format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
input_device_index = device_index,
frames_per_buffer = INPUT_FRAMES_PER_BLOCK)
def readData(self):
block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
count = len(block)/2
format = "%dh"%(count)
shorts = struct.unpack( format, block )
if CHANNELS == 1:
return np.array(shorts)
else:
l = shorts[::2]
r = shorts[1::2]
if LR == 'l':
return np.array(l)
else:
return np.array(r)
def initUI(self):
self.app = QtWidgets.QApplication([]) # self.app = QtGui.QApplication([])
self.app.quitOnLastWindowClosed()
self.mainWindow = QtWidgets.QMainWindow()
self.mainWindow.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
self.mainWindow.setWindowTitle("Spectrum Analyzer")
self.mainWindow.setGeometry(100, 100, 300, 200)#self.mainWindow.resize(800,300)
self.centralWid = QtWidgets.QWidget()
self.mainWindow.setCentralWidget(self.centralWid)
self.lay = QtWidgets.QVBoxLayout()
self.centralWid.setLayout(self.lay)
# Add a button
self.button_start = QtWidgets.QPushButton("Start Record Audio")
self.button_start.clicked.connect(self.Button_Start)
self.lay.addWidget(self.button_start)
self.button_end = QtWidgets.QPushButton("whisper Init")
self.whisper = None
self.is_whisper_inited = False
self.button_end.clicked.connect(self.Button_Whisper)
self.lay.addWidget(self.button_end)
self.button = QtWidgets.QPushButton("TRANS AUDIO")
self.button.clicked.connect(self.Button_TransAudio)
self.lay.addWidget(self.button)
# Add a text label
self.label = QtWidgets.QLabel("Text will appear here:")
self.lay.addWidget(self.label)
# Add a QLineEdit
self.text_field = QtWidgets.QLineEdit()
self.text_field.setFixedSize(280, 200)
self.lay.addWidget(self.text_field)
self.specWid = pg.PlotWidget(name="spectrum")
self.specItem = self.specWid.getPlotItem()
self.specItem.setMouseEnabled(y=False)
self.specItem.setYRange(0,1000)
self.specItem.setXRange(-RANGE,RANGE, padding=0)
self.specAxis = self.specItem.getAxis("bottom")
self.specAxis.setLabel("Frequency [Hz]")
self.lay.addWidget(self.specWid)
self.mainWindow.show()
self.app.aboutToQuit.connect(self.close)
def onButtonClick(self):
self.label.setText("Whisper res is:")
self.text_field.setText("Hello")
def Button_Whisper(self):
self.whisper = Whisper('large',model_path= "/home/pdd/myassets/ggml-medium.bin")
self.is_whisper_inited = True
self.text_field.setText("Whisper INITED")
def Button_TransAudio(self):
result = self.whisper.transcribe("/home/pdd/le/pywhisper/output.wav") # result = w.transcribe("myfile.mp3")
print(123)
text = self.whisper.extract_text(result)
self.text_field.setText(str(text))
def Button_Start(self):
self.label.setText("Whisper res is:")
self.text_field.setText("Start ---")
# 录制音频
frames = []
sample_rate = 16000
duration = 5
for i in range(0, int(sample_rate / 1024 * duration)):
data = self.stream.read(1024)
frames.append(data)
# 将录制的音频保存为wav文件
with wave.open("output.wav", 'wb') as wf:
wf.setnchannels(CHANNELS) # 2
wf.setsampwidth(self.pa.get_sample_size(FORMAT)) # 2
wf.setframerate(sample_rate)
wf.writeframes(b''.join(frames))
self.text_field.setText("保存为wav文件")
def close(self):
self.stream.close()
sys.exit()
def get_spectrum(self, data):
T = 1.0/RATE
N = data.shape[0]
Pxx = (1./N)*np.fft.fft(data)
f = np.fft.fftfreq(N,T)
Pxx = np.fft.fftshift(Pxx)
f = np.fft.fftshift(f)
return f.tolist(), (np.absolute(Pxx)).tolist()
def mainLoop(self):
while 1:
# Sometimes Input overflowed because of mouse events, ignore this
try:
data = self.readData()
except IOError:
continue
f, Pxx = self.get_spectrum(data)
self.specItem.plot(x=f,y=Pxx, clear=True)
QtWidgets.QApplication.processEvents()
if __name__ == '__main__':
sa = SpectrumAnalyzer()
sa.mainLoop()