讯飞语音的语音识别每次识别最长只支持60秒,中间有较长静音的话,也会自动停止识别。
讯飞SDK不支持多线程调用。即一次session所涉及的begin—end接口必须在同一个线程中调用
MSPLogin(NULL, NULL,”appid = XXXXXXX”); 不用用户名和密码也可以登录成功。
调试在OK210ARM板上单独调试无问题。
这个可以通过注册账号之后,然后需要独立的开发环境的话,则需要向科大讯飞提交申请,为你专门编译开发库。
qvoiceintercom.h文件
#ifndef QVOICEINTERCOM_H
#define QVOICEINTERCOM_H
#include
#include
#include
#include
#include "include/msp_cmn.h"
#include "include/msp_errors.h"
#include "include/msp_types.h"
#include "include/qisr.h"
#include "include/qtts.h"
#define LENGTH 1 /* 存储秒数 */
#define RATE 16000 /* 采样频率 */
#define SIZE 8 /* 量化位数 */
#define CHANNELS 1 /* 声道数目 */
#define RSIZE 8 /*buf的大小 */
#define PCM_TEXT_TO_SPEECH "/home/SMARTHOME/info/text_to_speech.pcm"
#define PCM_SOUND "/home/SMARTHOME/info/sound.pcm"
#define PCM_SPEAKLOUNDLY "/home/SMARTHOME/info/speakloundly.pcm"
#define JSON_DIR "/home/SMARTHOME/info"
#define BLOCK_LEN (LENGTH *RATE * SIZE * CHANNELS / 8)
/*
typedef int SR_DWORD;
typedef short int SR_WORD ;
//音频头部格式
struct wave_pcm_hdr
{
char riff[4]; // = "RIFF"
SR_DWORD size_8; // = FileSize - 8
char wave[4]; // = "WAVE"
char fmt[4]; // = "fmt "
SR_DWORD dwFmtSize; // = 下一个结构体的大小 : 16
SR_WORD format_tag; // = PCM : 1
SR_WORD channels; // = 通道数 : 1
SR_DWORD samples_per_sec; // = 采样率 : 8000 | 6000 | 11025 | 16000
SR_DWORD avg_bytes_per_sec; // = 每秒字节数 : dwSamplesPerSec * wBitsPerSample / 8
SR_WORD block_align; // = 每采样点字节数 : wBitsPerSample / 8
SR_WORD bits_per_sample; // = 量化比特数: 8 | 16
char data[4]; // = "data";
SR_DWORD data_size; // = 纯数据长度 : FileSize - 44
} ;
//默认音频头部数据
static struct wave_pcm_hdr default_pcmwavhdr =
{
{ 'R', 'I', 'F', 'F' },
0,
{'W', 'A', 'V', 'E'},
{'f', 'm', 't', ' '},
16,
1,
1,
16000,
32000,
2,
16,
{'d', 'a', 't', 'a'},
0
};
*/
class rw_thread;
namespace Ui {
class qvoiceintercom;
class rw_thread;
class vd_thread;
}
class qvoiceintercom : public QWidget
{
Q_OBJECT
public:
explicit qvoiceintercom(QWidget *parent = 0);
~qvoiceintercom();
/************str2audio**/
private:
char appid[15] ; //54b0856b must be required
char username[20]; //appid is not the username could be NULL
char passwd[20]; //user's password could be NULL
char login_configs[128]; //login_configs = " appid = 54b0856b, work_dir = . ";
char wavfilename[128] ; //text_to_speech.pcm
char param[256]; //"vcn = xiaoyu,aue = speex-wb,auf = audio/L16;rate = 1600,spd = 5,vol = 5,tte = utf8"
public:
//set
void str2audioInit();
int setWavfilename(const char * filename);
void setParam(const char * vcn , int spd, int vol);
//functions
int textToSpeech(const char* src_text ,const char* des_path ,const char* params);
/************audio_http**/
private:
char *info;
QByteArray json_buf;
QString turing_url;
QString *base_url;
QNetworkAccessManager *manager;
QNetworkReply *reply;
QFile fjson;
QByteArray vh_code;
QTimer *timeOut;
public:
bool audioHttpInit();
void turingInput(QString str);
void rst_analyse(QByteArray qba,bool flag);
int turingRead(QByteArray j_name,QByteArray *j_data,QByteArray rst_json,int index);
private slots:
void replyFinished(QNetworkReply *reply);
void handleTimeOut();
/************audio2str**/
public:
void audio2strInit(); //audio2str Initial
void dspReadEntrance(); //the entrance of starting Record
void dspWriteEntrance(); //the entrance of starting Play
private slots:
void rst_manage();
/************voiceIntercom**/
private slots:
void on_recordButton_pressed();
void on_recordButton_released();
void on_pushButton_clicked();
private:
//com_serport *com1;
public:
/************voiceIntercom**********/
private:
Ui::qvoiceintercom *ui;
};
//read and write thread====record
class rw_thread : public QThread
{
Q_OBJECT
public:
void run();
explicit rw_thread(int stat);
signals:
void readend();
private:
int th_stat;
};
//send and get data from tuling....
class vd_thread : public QThread
{
Q_OBJECT
public:
void run();
explicit vd_thread();
int vd_send(const char * recog_param);
int vd_end();
signals:
void getresult();
public slots:
void vd_stop();
public:
char * results;
private:
int upload_user_vocabulary();
FILE* result_txt;
int audio_status;
const char *recog_params;
const char *gramer;
};
#endif // QVOICEINTERCOM_H
qvoiceintercom.cpp文件
#include "qvoiceintercom.h"
#include "ui_qvoiceintercom.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
static bool vdFlag;
QSemaphore freeSpace(5);
QSemaphore usedSpace(0);
static long int count = 0; /*触摸屏按钮筱抖,防止短时间内多次抖动误触摸*/
rw_thread *th_read;
rw_thread *th_write;
vd_thread *th_vd;
static void dspInit(int fd,int rate,int sample_size , int sample_channel)
{
/* 设置采样时的量化位数 */
int arg; /* 用于ioctl调用的参数 */
int status; /* 系统调用的返回值 */
arg = sample_size;
status = ioctl(fd, SOUND_PCM_WRITE_BITS, &arg);
if (status == -1)
perror("SOUND_PCM_WRITE_BITS ioctl failed");
if (arg != sample_size)
perror("unable to set sample size");
/* 设置采样时的声道数目 */
arg = sample_channel;
status = ioctl(fd, SOUND_PCM_WRITE_CHANNELS, &arg);
if (status == -1)
perror("SOUND_PCM_WRITE_CHANNELS ioctl failed");
if (arg != sample_channel)
perror("unable to set number of channels");
/* 设置采样时的采样频率 */
arg = rate;
status = ioctl(fd, SOUND_PCM_WRITE_RATE, &arg);
if (status == -1)
perror("SOUND_PCM_WRITE_WRITE ioctl failed");
status = ioctl(fd, SOUND_PCM_SYNC, 0);
if (status == -1)
perror("SOUND_PCM_SYNC ioctl failed");
}
static int rdOpen()
{
int fd;
fd = open("/dev/dsp", O_RDONLY);
if (fd < 0)
{
perror("open of /dev/dsp failed");
return -1;
}
printf("[rdOpen]fd_dsprd = %d\n",fd);
dspInit(fd ,RATE, (SIZE*2) , CHANNELS);
return fd;
}
static int wrOpen()
{
int fd;
fd = open("/dev/dsp", O_WRONLY);
if (fd < 0)
{
perror("[write]open of /dev/dsp failed");
return -1;
}
printf("[wrOpen]fd_dspwr = %d\n",fd);
dspInit(fd ,(RATE/2), (SIZE*2) , CHANNELS);
return fd;
}
static int pcmPlay(const char * pcmfilename)
{
int fd_f;
unsigned int i ;
unsigned char buf[RSIZE];
unsigned long filesize = -1;
int fDspwr;
struct stat statbuff;
if(NULL == pcmfilename)return -1;
if((fDspwr = wrOpen()) < 0)return -1;
if(stat(pcmfilename, &statbuff) < 0){
printf("Get pcmfilesize failed,please check!\n");
return -1;
}else{
filesize = statbuff.st_size;
}
printf("PlayPCM...filesize=%ld\n",filesize);
if(( fd_f = open(pcmfilename, O_RDONLY,0777))==-1)
{
perror("cannot open the sound file");
return -1;
}
lseek(fd_f,0,SEEK_SET);
for(i=0;i<(filesize)/RSIZE;i++)
{
if (read(fd_f, buf, sizeof(buf)) != sizeof(buf))
perror("pcmfile read wrong number of bytes");
if (write(fDspwr, buf, sizeof(buf)) != sizeof(buf))
perror("pcm write wrong number of bytes");
}
::close(fd_f);
ioctl(fDspwr, SOUND_PCM_SYNC, 0);
::close(fDspwr);
return 0;
}
qvoiceintercom::qvoiceintercom(QWidget *parent) :
QWidget(parent),
ui(new Ui::qvoiceintercom)
{
ui->setupUi(this);
this->setWindowFlags(Qt::FramelessWindowHint);
ui->textEdit->setFontPointSize(20);
ui->recordButton->setFlat(true);
ui->recordButton->setFocusPolicy(Qt::NoFocus);
ui->textEdit->setAttribute(Qt::WA_InputMethodEnabled, false);
ui->recordButton->setAttribute(Qt::WA_InputMethodEnabled, false);
audioHttpInit();
audio2strInit();
str2audioInit();
//com1 = new com_serport(&comfd,DEV_NAME,8,1,'N');
}
qvoiceintercom::~qvoiceintercom()
{
delete ui;
reply->deleteLater();
}
/********************************************str2audio-start*******************************/
void qvoiceintercom::str2audioInit()
{
setWavfilename(PCM_TEXT_TO_SPEECH);
setParam("xiaoyan",7,3);
}
int qvoiceintercom::setWavfilename(const char * filename)
{
strncpy(wavfilename,filename,sizeof(wavfilename));
return 0;
}
void qvoiceintercom::setParam(const char * vcn , int spd, int vol)
{
char tempParam[256] = "";
int num = 0;
num = sprintf(tempParam,"vcn = %s,aue = speex-wb,auf = audio/L16;rate = 16000,spd = %d,vol = %d,tte = utf8",vcn,spd,vol);
qDebug()<<"setParam=" << tempParam << endl;
memset(param,0,sizeof(param));
strncpy(param,tempParam,sizeof(param));
}
int qvoiceintercom::textToSpeech(const char *src_text, const char *des_path, const char *params)
{
const char* sess_id = NULL;
int ret = 0;
unsigned int text_len = 0;
unsigned int audio_len = 0;
int synth_status = 1;
FILE* fp = NULL;
printf("\nStart TTS!\n");
qDebug()<<"tts param =" << params << endl;
if (NULL == src_text || NULL == des_path || NULL == params)
{
printf("params is null!\n");
return -1;
}
text_len = (unsigned int)strlen(src_text);
fp = fopen(des_path,"wb");
if (NULL == fp)
{
printf("open file %s error\n",des_path);
return -1;
}
sess_id = QTTSSessionBegin(params, &ret);
if ( ret != MSP_SUCCESS )
{
printf("QTTSSessionBegin: qtts begin session failed Error code %d.\n",ret);
return ret;
}
ret = QTTSTextPut(sess_id, src_text, text_len, NULL );
if ( ret != MSP_SUCCESS )
{
printf("QTTSTextPut: qtts put text failed Error code %d.\n",ret);
QTTSSessionEnd(sess_id, "TextPutError");
return ret;
}
while (1)
{
const void *data = QTTSAudioGet(sess_id, &audio_len, &synth_status, &ret);
if (NULL != data)
{
fwrite(data, audio_len, 1, fp);
}
printf("\nget audio...\n");
usleep(150000); //建议可以sleep下,因为只有云端有音频合成数据,audioget都能获取到音频。
if (synth_status == 2 || ret != 0)
break;
}
fclose(fp);
ret = QTTSSessionEnd(sess_id, NULL);
if ( ret != MSP_SUCCESS )
{
printf("QTTSSessionEnd: qtts end failed Error code %d.\n",ret);
}
printf("\nEnd TTS!\n");
return ret;
}
/********************************************audio_http-start*******************************/
bool qvoiceintercom::audioHttpInit()
{
base_url = new QString("http://www.tuling123.com/openapi/api?key=39a5a489acd12eeafaf8a0d7cc1b139a&info=");
manager = new QNetworkAccessManager(this);
timeOut = new QTimer(this);
connect(manager,SIGNAL(finished(QNetworkReply*)),this,SLOT(replyFinished(QNetworkReply*)));
connect(timeOut, SIGNAL(timeout()), this, SLOT(handleTimeOut()));
qDebug("Http Connected");
return true;
}
void qvoiceintercom::turingInput(QString str)
{
printf("[Enter tuiringInput]");
QDir::setCurrent(JSON_DIR);
fjson.setFileName("json.txt");
if(!fjson.open(QIODevice::WriteOnly)){
qDebug() << "Error:Cannot open Json file.";
return ;
}
turing_url = *base_url + str;
qDebug()<< turing_url << endl <<"Waiting ...." << endl;
timeOut->start(10000);
manager->get(QNetworkRequest(QUrl(turing_url)));
}
void qvoiceintercom::replyFinished(QNetworkReply *reply)
{
qDebug("[Enter replyFinished]");
json_buf.clear();
json_buf = reply->readAll();
qDebug() << tr(json_buf.data());
rst_analyse(json_buf,1);
ui->textEdit->append(tr("[小灵]"
) + QString(tr(vh_code.data())) + tr(""));
if(strcmp("关闭",th_vd->results) == 0){
printf("enter this->close\n");
count = 0;
printf("Dsp device was closed!\n");
this->close();
}
textToSpeech(vh_code.data(),wavfilename,param);
dspWriteEntrance();
free(th_vd->results);
fjson.write(json_buf);
fjson.close();
if (timeOut->isActive())
{
timeOut->stop();
}
}
void qvoiceintercom::handleTimeOut()
{
qDebug()<<"[Enter handleTimeOut function]\n";
timeOut->stop();
if (fjson.isOpen())
{
fjson.close();
ui->textEdit->append(tr("[提示]#获取云端对话已经超时!#
"));
qDebug()<< tr("[提示]#获取云端对话已经超时!#");
}
else qDebug()<<"readAll : within 6 seconds! Next,do something...\n";
}
void qvoiceintercom::rst_analyse(QByteArray qba,bool flag)
{
vh_code.clear();
if(0 == flag ){
if(turingRead(QByteArray("code"),&vh_code,qba,0) == -1)
{
qDebug("read error");
}
}
else if(1 == flag){
if(turingRead(QByteArray("text"),&vh_code,qba,0) == -1)
{
qDebug("read error");
}
}
printf("vh_code.data:%s\n",vh_code.data());
}
int qvoiceintercom::turingRead(QByteArray j_name,QByteArray *j_data,QByteArray rst_json,int index)
{
int i = index;
char *rst = rst_json.data();
bool flag = 0;
QByteArray temp;
if(i == 0)
i++;
while(1)
{
//printf("i:%d\n",i);
while((rst[i] != ':') || (flag == true))
{
if(rst[i] == '"')
{
printf("rst[%d]:%c flag:%d\n",i,rst[i],flag);
i++;
flag = !flag;
continue;
}
temp += rst[i++];
}
//printf("temp:%s\n",temp.data());
//printf("name:%s\n",j_name.data());
if(strstr(temp,j_name))
break;
while((rst[i] != ',') && (rst[i] != '}'))
{
i++;
}
if(rst[i] == '}')
{
qDebug("can not find data!");
return -1;
}
i++;
}
//printf("nameend%c\n",rst_json.at(i));
i++;
while(!((rst[i] == ',') || (rst[i] == '}')) || (flag == true))
{
if(rst[i] == '"')
{
i++;
flag ^= flag;
}
if(rst[i] == '}')break;
*j_data += rst[i++];
}
//printf("%c\n",rst_json.at(i));
i++;
return i;
}
/********************************************audio2str-start*******************************/
void qvoiceintercom::audio2strInit()
{
th_vd = new vd_thread;
th_read = new rw_thread(0);
th_write = new rw_thread(1);
connect(th_read,SIGNAL(readend()),th_vd,SLOT(vd_stop()));
connect(th_vd,SIGNAL(getresult()),this,SLOT(rst_manage()));
}
void qvoiceintercom::dspReadEntrance()
{
qDebug()<<"[Enter dspReadEntrance()]\n";
th_read->start();
}
void qvoiceintercom::dspWriteEntrance()
{
th_write->start();
count = 0;
}
void qvoiceintercom::rst_manage()
{
qDebug()<<"[Enter rst_manage()]\n";
printf("\nresults =%s=\n",th_vd->results);
if( strcmp("",th_vd->results) == 0){
printf("手抖了,或者您的声音太小了!");
ui->textEdit->append(tr("[提示]#手抖了,或者您的声音太小了!#
"));
pcmPlay(PCM_SPEAKLOUNDLY);
count = 0;
}
else {
ui->textEdit->append(tr("[主人]"
)+ QObject::tr(th_vd->results) + tr(""));
turingInput(QString(tr(th_vd->results)));
}
//com1->msg_send(th_vd->results);
}
/********************************************rw_thread start*******************************/
rw_thread::rw_thread(int stat)
{
th_stat = stat;
}
void rw_thread::run()
{
int fDsprd = -1;
int status =0 ;
if(th_stat == 1)
{
qDebug()<<"write rw_thread is runing";
pcmPlay(PCM_TEXT_TO_SPEECH);
}
if(th_stat == 0)
{
if((fDsprd = rdOpen()) < 0){
return ;
}
qDebug()<<"read rw_thread is runing.." << fDsprd ;
freeSpace.acquire();
printf("[rw_thread] freeSpace= %d\n",freeSpace.available());
th_vd->start();
FILE * fp;
fp = fopen(PCM_SOUND,"w");
if (NULL == fp)
{
printf("open file error\n");
exit(1);
}
unsigned char dspBuf[LENGTH *RATE * SIZE * CHANNELS / 8] = "";
while(vdFlag)
{
status = read(fDsprd, dspBuf, sizeof(dspBuf));
if (status != sizeof(dspBuf))
printf("read wrong number of bytes");
fwrite(dspBuf, sizeof(dspBuf), 1, fp);
printf("finished read\n");
memset(dspBuf,0,sizeof(dspBuf));
}
fclose(fp);
::close(fDsprd);
emit readend();
usleep(200000);
usedSpace.release();
printf("[rw_thread] userdSpace= %d\n",usedSpace.available());
}
}
/********************************************vd_thread start*******************************/
int vd_thread::vd_end()
{
int ret;
ret = MSPLogout();
if( MSP_SUCCESS != ret )
{
printf( "MSPLogout failed, error code is: %d", ret );
return -1;
}
return 0;
}
int vd_thread::vd_send(const char * recog_param)
{
qDebug()<<"[Enter vd_send]\n";
int vd_ret;
int ep_status;
int rslt_status;
const char *session_id;
audio_status = 0x02;
/* 开始一路会话 */
session_id = QISRSessionBegin( NULL, recog_param, &vd_ret );
if( 0 != vd_ret )
{
printf( "QISRSessionBegin failed, error code is %d\n", vd_ret );
return -1;
}
FILE *f_pcm = NULL;
long pcmSize = 0;
int lastAudio = 0 ;
int audStat = 2 ;
long pcmCount = 0;
char rec_result[1024*4] = {0};
char * wrBuf = NULL;
usedSpace.acquire();
printf("[vd_thread] usedSpace= %d\n",usedSpace.available());
f_pcm = fopen(PCM_SOUND, "rb");
if (NULL != f_pcm) {
fseek(f_pcm, 0, SEEK_END);
pcmSize = ftell(f_pcm);
printf("pcmSize = %ld\n",pcmSize);
fseek(f_pcm, 0, SEEK_SET);
wrBuf = (char *)malloc(pcmSize);
fread((void *)wrBuf, pcmSize, 1, f_pcm);
fclose(f_pcm);
f_pcm = NULL;
}
else return -1;
/* 发送音频数据,获取语音听写结果*/
while( 1)
{
unsigned int len = 6400;
unsigned int audio_len = 6400;
if (pcmSize < 12800) {
len = pcmSize;
lastAudio = 1;
}
audStat = 2;
if (pcmCount == 0)
audStat = 1;
if (len<=0)
{
printf("len <= 0 : break\n");
break;
}
printf("\ncsid=%s,count=%ld,aus=%d,",session_id,pcmCount/audio_len,audStat);
vd_ret = QISRAudioWrite(session_id, (const void *)&wrBuf[pcmCount], len, audStat, &ep_status, &rslt_status);
printf("eps=%d,rss=%d,ret=%d",ep_status,rslt_status,vd_ret);
if (vd_ret != 0){
printf("vd_ret is not 0 : break\n");
break;
}
pcmCount += (long)len;
pcmSize -= (long)len;
if (rslt_status == 0) {
const char *rslt = QISRGetResult(session_id, &rslt_status, 0, &vd_ret);
if (vd_ret !=0)
{
printf("QISRGetResult Failed,vd_ret=%d\n",vd_ret);
break;
}
if (NULL != rslt)
strcat(rec_result,rslt);
}
if (ep_status == MSP_EP_AFTER_SPEECH)
break;
usleep(150000);
}
/* 获取余下的识别结果*/
printf("start get the rest of results\n");
vd_ret=QISRAudioWrite(session_id, (const void *)NULL, 0, 4, &ep_status, &rslt_status);
if (vd_ret !=0)
{
printf("QISRAudioWrite Failed,vd_ret=%d\n",vd_ret);
}
free(wrBuf);
wrBuf = NULL;
while (rslt_status != 5 && vd_ret == 0) {
printf("get the rest of results\n");
const char *rslt = QISRGetResult(session_id, &rslt_status, 0, &vd_ret);
if (NULL != rslt)
{
strcat(rec_result,rslt);
}
usleep(150000);
}
vd_ret=QISRSessionEnd(session_id, NULL);
if(vd_ret !=MSP_SUCCESS)
{
printf("QISRSessionEnd Failed, vd_ret=%d\n",vd_ret);
}
freeSpace.release();
qDebug()<< "[vd_thread] freeSpace:" << freeSpace.available() << endl;
results = (char *)malloc(sizeof(rec_result));
strncpy(results,rec_result,sizeof(rec_result));
/* sleep 一下很有必要,防止MSC 端无缓存的识别结果时浪费CPU 资源*/
usleep(100000);
if( NULL != rec_result )
{
emit getresult();
}
/* 结束会话,释放资源*/
session_id = NULL;
return 0;
}
vd_thread::vd_thread()
{
int ret = 0;
/* 用户登录 */
ret = MSPLogin("Birdman", "6726389!","appid = 54601962");
if( 0 != ret )
{
printf( "MSPLogin failed, error code is %d\n", ret );
return ;
}
printf("\nMSPLogin successful :%d\n",ret);
recog_params = "sub=iat, ptt=0, aue=speex-wb;7, auf=audio/L16;rate=16000, ent=sms16k, rst=plain, rse=utf8, vad_speech_tail=1500";
}
void vd_thread::run()
{
qDebug()<<"vd_thread is runing...\n";
vd_send(recog_params);
}
void vd_thread::vd_stop()
{
qDebug("[Enter vd_stop]");
audio_status = MSP_AUDIO_SAMPLE_LAST ;
}
/***************************************************************************************/
void qvoiceintercom::on_recordButton_pressed()
{
qDebug()<<"[Enter on_recordButton_pressed]";
vdFlag = true;
count++;
if(count ==1)
dspReadEntrance();
}
void qvoiceintercom::on_recordButton_released()
{
qDebug()<<"[Enter on_recordButton_released]";
vdFlag = false;
}
void qvoiceintercom::on_pushButton_clicked()
{
printf("[Enter on_pushButton_clicked]");
count = 0;
printf("Dsp device was closed!\n");
this->close();
}