golang 使用科大讯飞进行语音合成与识别
使用科大讯飞 API 进行语音合成和识别,可识别wav和pcm文件
语音识别
package main
import (
"context"
"crypto/hmac"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/url"
"os"
"strings"
"time"
"github.com/gorilla/websocket"
)
/**
* 语音听写流式 WebAPI 接口调用示例 接口文档(必看):https://doc.xfyun.cn/rest_api/语音听写(流式版).html
* webapi 听写服务参考帖子(必看):http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38947&extra=
* 语音听写流式WebAPI 服务,热词使用方式:登陆开放平台https://www.xfyun.cn/后,找到控制台--我的应用---语音听写---服务管理--上传热词
* 注意:热词只能在识别的时候会增加热词的识别权重,需要注意的是增加相应词条的识别率,但并不是绝对的,具体效果以您测试为准。
* 错误码链接:https://www.xfyun.cn/document/error-code (code返回错误码时必看)
* @author iflytek
*/
var (
hostUrl = "wss://iat-api.xfyun.cn/v2/iat"
apiKey = "自己的key"
apiSecret = "自己的secret"
file = "test.wav" //请填写您的音频文件路径
appid = "自己的appid"
)
const (
STATUS_FIRST_FRAME = 0
STATUS_CONTINUE_FRAME = 1
STATUS_LAST_FRAME = 2
)
func main() {
fmt.Println(HmacWithShaTobase64("hmac-sha256", "hello\nhello", "hello"))
st := time.Now()
d := websocket.Dialer{
HandshakeTimeout: 5 * time.Second,
}
//握手并建立websocket 连接
conn, resp, err := d.Dial(assembleAuthUrl(hostUrl, apiKey, apiSecret), nil)
if err != nil {
panic(readResp(resp) + err.Error())
return
} else if resp.StatusCode != 101 {
panic(readResp(resp) + err.Error())
}
//打开音频文件
var frameSize = 1280 //每一帧的音频大小
var intervel = 40 * time.Millisecond //发送音频间隔
//开启协程,发送数据
ctx, _ := context.WithCancel(context.Background())
defer conn.Close()
var status = 0
go func() {
// start:
audioFile, err := os.Open(file)
if err != nil {
panic(err)
}
status = STATUS_FIRST_FRAME //音频的状态信息,标识音频是第一帧,还是中间帧、最后一帧
// time.Sleep(20*time.Second)
var buffer = make([]byte, frameSize)
for {
len, err := audioFile.Read(buffer)
if err != nil {
if err == io.EOF { //文件读取完了,改变status = STATUS_LAST_FRAME
status = STATUS_LAST_FRAME
} else {
panic(err)
}
}
select {
case <-ctx.Done():
fmt.Println("session end ---")
return
default:
}
switch status {
case STATUS_FIRST_FRAME: //发送第一帧音频,带business 参数
frameData := map[string]interface{}{
"common": map[string]interface{}{
"app_id": appid, //appid 必须带上,只需第一帧发送
},
"business": map[string]interface{}{ //business 参数,只需一帧发送
"language": "zh_cn",
"domain": "iat",
"accent": "mandarin",
},
"data": map[string]interface{}{
"status": STATUS_FIRST_FRAME,
"format": "audio/L16;rate=16000",
"audio": base64.StdEncoding.EncodeToString(buffer[:len]),
"encoding": "raw",
},
}
fmt.Println("send first")
conn.WriteJSON(frameData)
status = STATUS_CONTINUE_FRAME
case STATUS_CONTINUE_FRAME:
frameData := map[string]interface{}{
"data": map[string]interface{}{
"status": STATUS_CONTINUE_FRAME,
"format": "audio/L16;rate=16000",
"audio": base64.StdEncoding.EncodeToString(buffer[:len]),
"encoding": "raw",
},
}
conn.WriteJSON(frameData)
case STATUS_LAST_FRAME:
frameData := map[string]interface{}{
"data": map[string]interface{}{
"status": STATUS_LAST_FRAME,
"format": "audio/L16;rate=16000",
"audio": base64.StdEncoding.EncodeToString(buffer[:len]),
"encoding": "raw",
},
}
conn.WriteJSON(frameData)
fmt.Println("send last")
return
// goto start
}
//模拟音频采样间隔
time.Sleep(intervel)
}
}()
//获取返回的数据
//var decoder Decoder
for {
var resp = RespData{}
_, msg, err := conn.ReadMessage()
if err != nil {
fmt.Println("read message error:", err)
break
}
json.Unmarshal(msg, &resp)
//fmt.Println(string(msg))
fmt.Println(resp.Data.Result.String(), resp.Sid)
if resp.Code != 0 {
fmt.Println(resp.Code, resp.Message, time.Since(st))
return
}
//decoder.Decode(&resp.Data.Result)
if resp.Data.Status == 2 {
//cf()
//fmt.Println("final:",decoder.String())
fmt.Println(resp.Code, resp.Message, time.Since(st))
break
//return
}
}
time.Sleep(1 * time.Second)
}
type RespData struct {
Sid string `json:"sid"`
Code int `json:"code"`
Message string `json:"message"`
Data Data `json:"data"`
}
type Data struct {
Result Result `json:"result"`
Status int `json:"status"`
}
//创建鉴权url apikey 即 hmac username
func assembleAuthUrl(hosturl string, apiKey, apiSecret string) string {
ul, err := url.Parse(hosturl)
if err != nil {
fmt.Println(err)
}
//签名时间
date := time.Now().UTC().Format(time.RFC1123)
//date = "Tue, 28 May 2019 09:10:42 MST"
//参与签名的字段 host ,date, request-line
signString := []string{"host: " + ul.Host, "date: " + date, "GET " + ul.Path + " HTTP/1.1"}
//拼接签名字符串
sgin := strings.Join(signString, "\n")
fmt.Println(sgin)
//签名结果
sha := HmacWithShaTobase64("hmac-sha256", sgin, apiSecret)
fmt.Println(sha)
//构建请求参数 此时不需要urlencoding
authUrl := fmt.Sprintf("hmac username=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", apiKey,
"hmac-sha256", "host date request-line", sha)
//将请求参数使用base64编码
authorization := base64.StdEncoding.EncodeToString([]byte(authUrl))
v := url.Values{}
v.Add("host", ul.Host)
v.Add("date", date)
v.Add("authorization", authorization)
//将编码后的字符串url encode后添加到url后面
callurl := hosturl + "?" + v.Encode()
return callurl
}
func HmacWithShaTobase64(algorithm, data, key string) string {
mac := hmac.New(sha256.New, []byte(key))
mac.Write([]byte(data))
encodeData := mac.Sum(nil)
return base64.StdEncoding.EncodeToString(encodeData)
}
func readResp(resp *http.Response) string {
if resp == nil {
return ""
}
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
panic(err)
}
return fmt.Sprintf("code=%d,body=%s", resp.StatusCode, string(b))
}
// 解析返回数据,仅供demo参考,实际场景可能与此不同。
type Decoder struct {
results []*Result
}
func (d *Decoder) Decode(result *Result) {
if len(d.results) <= result.Sn {
d.results = append(d.results, make([]*Result, result.Sn-len(d.results)+1)...)
}
if result.Pgs == "rpl" {
for i := result.Rg[0]; i <= result.Rg[1]; i++ {
d.results[i] = nil
}
}
d.results[result.Sn] = result
}
func (d *Decoder) String() string {
var r string
for _, v := range d.results {
if v == nil {
continue
}
r += v.String()
}
return r
}
type Result struct {
Ls bool `json:"ls"`
Rg []int `json:"rg"`
Sn int `json:"sn"`
Pgs string `json:"pgs"`
Ws []Ws `json:"ws"`
}
func (t *Result) String() string {
var wss string
for _, v := range t.Ws {
wss += v.String()
}
return wss
}
type Ws struct {
Bg int `json:"bg"`
Cw []Cw `json:"cw"`
}
func (w *Ws) String() string {
var wss string
for _, v := range w.Cw {
wss += v.W
}
return wss
}
type Cw struct {
Sc int `json:"sc"`
W string `json:"w"`
}
语音合成
运行完成后在本地生成test.pcm文件,将srcText内容合成为语音
package main
import (
"crypto/hmac"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"os"
"strings"
"time"
"github.com/gorilla/websocket"
)
/**
* 语音听写流式 WebAPI 接口调用示例 接口文档(必看):https://doc.xfyun.cn/rest_api/语音听写(流式版).html
* webapi 听写服务参考帖子(必看):http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38947&extra=
* 语音听写流式WebAPI 服务,热词使用方式:登陆开放平台https://www.xfyun.cn/后,找到控制台--我的应用---语音听写---服务管理--上传热词
* 注意:热词只能在识别的时候会增加热词的识别权重,需要注意的是增加相应词条的识别率,但并不是绝对的,具体效果以您测试为准。
* 错误码链接:https://www.xfyun.cn/document/error-code (code返回错误码时必看)
* @author iflytek
*/
var (
hostUrl = "wss://tts-api.xfyun.cn/v2/tts"
apiKey = "自己key"
apiSecret = "自己secret"
file = "test.pcm" //请填写您的音频文件路径
appid = "自己的appid"
)
const (
STATUS_FIRST_FRAME = 0
STATUS_CONTINUE_FRAME = 1
STATUS_LAST_FRAME = 2
)
func main() {
fmt.Println(HmacWithShaTobase64("hmac-sha256", "hello\nhello", "hello"))
st := time.Now()
d := websocket.Dialer{
HandshakeTimeout: 5 * time.Second,
}
var srcText string = "你好天下123ABC"
//握手并建立websocket 连接
conn, resp, err := d.Dial(assembleAuthUrl(hostUrl, apiKey, apiSecret), nil)
if err != nil {
panic(readResp(resp) + err.Error())
return
} else if resp.StatusCode != 101 {
panic(readResp(resp) + err.Error())
}
defer conn.Close()
frameData := map[string]interface{}{
"common": map[string]interface{}{
"app_id": appid, //appid 必须带上,只需第一帧发送
},
"business": map[string]interface{}{ //business 参数,只需一帧发送
"vcn": "xiaoyan",
"aue": "raw",
"speed": 50,
"tte": "UTF8",
},
"data": map[string]interface{}{
"status": STATUS_LAST_FRAME,
"encoding": "UTF8",
"text": base64.StdEncoding.EncodeToString([]byte(srcText)),
},
}
fmt.Println("send first")
conn.WriteJSON(frameData)
//获取返回的数据
//var decoder Decoder
audioFile, err := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.ModePerm)
if err != nil {
panic(err)
}
for {
var resp = RespData{}
_, msg, err := conn.ReadMessage()
if err != nil {
fmt.Println("read message error:", err)
break
}
json.Unmarshal(msg, &resp)
//fmt.Println(string(msg))
//fmt.Println(resp.Data.Audio, resp.Sid)
if resp.Code != 0 {
fmt.Println(resp.Code, resp.Message, time.Since(st))
return
}
//decoder.Decode(&resp.Data.Audio)
audiobytes, err := base64.StdEncoding.DecodeString(resp.Data.Audio)
if err != nil {
panic(err)
}
_, err = audioFile.Write(audiobytes)
if err != nil {
panic(err)
}
if resp.Data.Status == 2 {
//cf()
//fmt.Println("final:",decoder.String())
fmt.Println(resp.Code, resp.Message, time.Since(st))
break
//return
}
}
audioFile.Close()
time.Sleep(1 * time.Second)
}
type RespData struct {
Sid string `json:"sid"`
Code int `json:"code"`
Message string `json:"message"`
Data Data `json:"data"`
}
type Data struct {
Audio string `json:"audio,omitempty"`
Ced int `json:"ced,omitempty"`
Status int `json:"status,omitempty"`
}
//创建鉴权url apikey 即 hmac username
func assembleAuthUrl(hosturl string, apiKey, apiSecret string) string {
ul, err := url.Parse(hosturl)
if err != nil {
fmt.Println(err)
}
//签名时间
date := time.Now().UTC().Format(time.RFC1123)
//date = "Tue, 28 May 2019 09:10:42 MST"
//参与签名的字段 host ,date, request-line
signString := []string{"host: " + ul.Host, "date: " + date, "GET " + ul.Path + " HTTP/1.1"}
//拼接签名字符串
sgin := strings.Join(signString, "\n")
fmt.Println(sgin)
//签名结果
sha := HmacWithShaTobase64("hmac-sha256", sgin, apiSecret)
fmt.Println(sha)
//构建请求参数 此时不需要urlencoding
authUrl := fmt.Sprintf("hmac username=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", apiKey,
"hmac-sha256", "host date request-line", sha)
//将请求参数使用base64编码
authorization := base64.StdEncoding.EncodeToString([]byte(authUrl))
v := url.Values{}
v.Add("host", ul.Host)
v.Add("date", date)
v.Add("authorization", authorization)
//将编码后的字符串url encode后添加到url后面
callurl := hosturl + "?" + v.Encode()
return callurl
}
func HmacWithShaTobase64(algorithm, data, key string) string {
mac := hmac.New(sha256.New, []byte(key))
mac.Write([]byte(data))
encodeData := mac.Sum(nil)
return base64.StdEncoding.EncodeToString(encodeData)
}
func readResp(resp *http.Response) string {
if resp == nil {
return ""
}
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
panic(err)
}
return fmt.Sprintf("code=%d,body=%s", resp.StatusCode, string(b))
}
// 解析返回数据,仅供demo参考,实际场景可能与此不同。
type Decoder struct {
results []*Result
}
func (d *Decoder) Decode(result *Result) {
if len(d.results) <= result.Sn {
d.results = append(d.results, make([]*Result, result.Sn-len(d.results)+1)...)
}
if result.Pgs == "rpl" {
for i := result.Rg[0]; i <= result.Rg[1]; i++ {
d.results[i] = nil
}
}
d.results[result.Sn] = result
}
func (d *Decoder) String() string {
var r string
for _, v := range d.results {
if v == nil {
continue
}
r += v.String()
}
return r
}
type Result struct {
Ls bool `json:"ls"`
Rg []int `json:"rg"`
Sn int `json:"sn"`
Pgs string `json:"pgs"`
Ws []Ws `json:"ws"`
}
func (t *Result) String() string {
var wss string
for _, v := range t.Ws {
wss += v.String()
}
return wss
}
type Ws struct {
Bg int `json:"bg"`
Cw []Cw `json:"cw"`
}
func (w *Ws) String() string {
var wss string
for _, v := range w.Cw {
wss += v.W
}
return wss
}
type Cw struct {
Sc int `json:"sc"`
W string `json:"w"`
}
语音合成 MP3
使用minimp3库进行播放,依赖MinGW64环境,MP3文件最后一段播放有问题,暂时没解决
package main
import (
"crypto/hmac"
"crypto/sha256"
"encoding/base64"
"encoding/json"
"fmt"
"github.com/hajimehoshi/oto"
"github.com/tosone/minimp3"
"io/ioutil"
"net/http"
"net/url"
"os"
"strings"
"sync"
"time"
"github.com/gorilla/websocket"
)
/**
* 语音听写流式 WebAPI 接口调用示例 接口文档(必看):https://doc.xfyun.cn/rest_api/语音听写(流式版).html
* webapi 听写服务参考帖子(必看):http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38947&extra=
* 语音听写流式WebAPI 服务,热词使用方式:登陆开放平台https://www.xfyun.cn/后,找到控制台--我的应用---语音听写---服务管理--上传热词
* 注意:热词只能在识别的时候会增加热词的识别权重,需要注意的是增加相应词条的识别率,但并不是绝对的,具体效果以您测试为准。
* 错误码链接:https://www.xfyun.cn/document/error-code (code返回错误码时必看)
* @author iflytek
*/
var (
hostUrl = "wss://tts-api.xfyun.cn/v2/tts"
apiKey = "你的key"
apiSecret = "你的secret"
file = "test.mp3" //请填写您的音频文件路径
appid = "你的id"
)
const (
STATUS_FIRST_FRAME = 0
STATUS_CONTINUE_FRAME = 1
STATUS_LAST_FRAME = 2
)
var wg sync.WaitGroup
func main() {
fmt.Println(HmacWithShaTobase64("hmac-sha256", "hello\nhello", "hello"))
st := time.Now()
d := websocket.Dialer{
HandshakeTimeout: 5 * time.Second,
}
var srcText string = "请佩戴口罩。请佩戴口罩。请佩戴口罩。"
//握手并建立websocket 连接
conn, resp, err := d.Dial(assembleAuthUrl(hostUrl, apiKey, apiSecret), nil)
if err != nil {
panic(readResp(resp) + err.Error())
return
} else if resp.StatusCode != 101 {
panic(readResp(resp) + err.Error())
}
defer conn.Close()
frameData := map[string]interface{}{
"common": map[string]interface{}{
"app_id": appid, //appid 必须带上,只需第一帧发送
},
"business": map[string]interface{}{ //business 参数,只需一帧发送
"vcn": "xiaoyan",
"aue": "lame",
"speed": 50,
"tte": "UTF8",
"sfl": 1,
},
"data": map[string]interface{}{
"status": STATUS_LAST_FRAME,
"encoding": "UTF8",
"text": base64.StdEncoding.EncodeToString([]byte(srcText)),
},
}
fmt.Println("send first")
conn.WriteJSON(frameData)
//获取返回的数据
//var decoder Decoder
audioFile, err := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.ModePerm)
if err != nil {
panic(err)
}
for {
var resp = RespData{}
_, msg, err := conn.ReadMessage()
if err != nil {
fmt.Println("read message error:", err)
break
}
json.Unmarshal(msg, &resp)
//fmt.Println(string(msg))
//fmt.Println(resp.Data.Audio, resp.Sid)
if resp.Code != 0 {
fmt.Println(resp.Code, resp.Message, time.Since(st))
return
}
//decoder.Decode(&resp.Data.Audio)
audiobytes, err := base64.StdEncoding.DecodeString(resp.Data.Audio)
if err != nil {
panic(err)
}
_, err = audioFile.Write(audiobytes)
if err != nil {
panic(err)
}
if resp.Data.Status == 2 {
//cf()
//fmt.Println("final:",decoder.String())
fmt.Println(resp.Code, resp.Message, time.Since(st))
break
}
}
audioFile.Close()
wg.Add(1)
go Sound(file)
wg.Wait()
//time.Sleep(10 * time.Second)
}
type RespData struct {
Sid string `json:"sid"`
Code int `json:"code"`
Message string `json:"message"`
Data Data `json:"data"`
}
type Data struct {
Audio string `json:"audio,omitempty"`
Ced int `json:"ced,omitempty"`
Status int `json:"status,omitempty"`
}
//创建鉴权url apikey 即 hmac username
func assembleAuthUrl(hosturl string, apiKey, apiSecret string) string {
ul, err := url.Parse(hosturl)
if err != nil {
fmt.Println(err)
}
//签名时间
date := time.Now().UTC().Format(time.RFC1123)
//date = "Tue, 28 May 2019 09:10:42 MST"
//参与签名的字段 host ,date, request-line
signString := []string{"host: " + ul.Host, "date: " + date, "GET " + ul.Path + " HTTP/1.1"}
//拼接签名字符串
sgin := strings.Join(signString, "\n")
fmt.Println(sgin)
//签名结果
sha := HmacWithShaTobase64("hmac-sha256", sgin, apiSecret)
fmt.Println(sha)
//构建请求参数 此时不需要urlencoding
authUrl := fmt.Sprintf("hmac username=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", apiKey,
"hmac-sha256", "host date request-line", sha)
//将请求参数使用base64编码
authorization := base64.StdEncoding.EncodeToString([]byte(authUrl))
v := url.Values{}
v.Add("host", ul.Host)
v.Add("date", date)
v.Add("authorization", authorization)
//将编码后的字符串url encode后添加到url后面
callurl := hosturl + "?" + v.Encode()
return callurl
}
func HmacWithShaTobase64(algorithm, data, key string) string {
mac := hmac.New(sha256.New, []byte(key))
mac.Write([]byte(data))
encodeData := mac.Sum(nil)
return base64.StdEncoding.EncodeToString(encodeData)
}
func readResp(resp *http.Response) string {
if resp == nil {
return ""
}
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
panic(err)
}
return fmt.Sprintf("code=%d,body=%s", resp.StatusCode, string(b))
}
func Sound(filename string){
defer wg.Done()
if len(filename) == 0{
fmt.Printf("%s 文件大小为0", filename)
return
}
file,err := ioutil.ReadFile(filename)
if err != nil{
fmt.Println(err)
return
}
dec, data, _ := minimp3.DecodeFull(file)
player, _ := oto.NewPlayer(
dec.SampleRate,
dec.Channels,
2,
10240)
player.Write(data)
player.Close()
}
// 解析返回数据,仅供demo参考,实际场景可能与此不同。
type Decoder struct {
results []*Result
}
func (d *Decoder) Decode(result *Result) {
if len(d.results) <= result.Sn {
d.results = append(d.results, make([]*Result, result.Sn-len(d.results)+1)...)
}
if result.Pgs == "rpl" {
for i := result.Rg[0]; i <= result.Rg[1]; i++ {
d.results[i] = nil
}
}
d.results[result.Sn] = result
}
func (d *Decoder) String() string {
var r string
for _, v := range d.results {
if v == nil {
continue
}
r += v.String()
}
return r
}
type Result struct {
Ls bool `json:"ls"`
Rg []int `json:"rg"`
Sn int `json:"sn"`
Pgs string `json:"pgs"`
Ws []Ws `json:"ws"`
}
func (t *Result) String() string {
var wss string
for _, v := range t.Ws {
wss += v.String()
}
return wss
}
type Ws struct {
Bg int `json:"bg"`
Cw []Cw `json:"cw"`
}
func (w *Ws) String() string {
var wss string
for _, v := range w.Cw {
wss += v.W
}
return wss
}
type Cw struct {
Sc int `json:"sc"`
W string `json:"w"`
}