人生若只如初见,何事秋风悲画扇。
整体的思路大致为:前端页面上传音频文件,后端接收文件上传至腾讯云COS,上传成功后得到该音频的存储地址即Url,随后将Url传给调取腾讯云一句话识别(Url识别的方式)的方法,最终得出语音识别结果。
1.注册腾讯云账号
2.获取SecretId和SecretKey
3.开通腾讯云语音识别及COS业务
4.新建一个Maven工程
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>DemoVoiceToWords</artifactId>
<version>1.0-SNAPSHOT</version>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.0.4.RELEASE</version>
</parent>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- 腾讯云COS依赖。 -->
<dependency>
<groupId>com.qcloud</groupId>
<artifactId>cos_api</artifactId>
<version>5.6.8</version>
</dependency>
<!-- 语音识别依赖。 -->
<dependency>
<groupId>com.tencentcloudapi</groupId>
<artifactId>tencentcloud-sdk-java</artifactId>
<version>3.1.62</version>
</dependency>
<dependency>
<groupId>com.qcloud</groupId>
<artifactId>qcloud-java-sdk</artifactId>
<version>2.0.1</version>
</dependency>
<!-- JsonObject依赖。 -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.66</version>
</dependency>
</dependencies>
</project>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/html">
<head>
<meta charset="UTF-8">
<title>上传测试</title>
</head>
<body>
<form action="http://localhost:8080/demo/upload" method="post" enctype="multipart/form-data">
<input type="file" name="file"/>
<input type="submit" value="提交"/>
</form>
</body>
</html>
package demorecognition.controller;
import demorecognition.util.COSUpLoadUtil;
import demorecognition.recognition.URLRecognition;
import com.alibaba.fastjson.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
@Controller
@CrossOrigin
@RequestMapping("/demo")
public class DemoController {
private static final Logger logger = LoggerFactory.getLogger(DemoController.class);
/**
* 语音文件上传并识别。
* @param multipartFile
* @return
*/
@PostMapping("upload")
public void upload(@RequestParam("file") MultipartFile multipartFile) {
String identificationResults=null;
logger.info("开始处理文件。");
if (multipartFile==null){
logger.info("上传的文件不存在。");
return;
}
String URL=null;
logger.info("开始文件上传至腾讯云COS。");
COSUpLoadUtil clu=new COSUpLoadUtil();
try {
//multipartFile.getSize():音频的文件大小,对应工具类里面的字节流长度。
//multipartFile.getOriginalFilename():文件的名称,具体看工具类。
//multipartFile:multipartFile类型的音频文件。
//voice:COS存储桶中存储文件的文件夹名称。(如果没有该文件夹,自动创建,根据需要设置。)
URL = clu.upLoadFile2COS(multipartFile.getSize(),multipartFile.getOriginalFilename(),multipartFile,"voice");
} catch (IOException e) {
e.printStackTrace();
}
logger.info("开始通过URL进行语音识别。");
URLRecognition ur = new URLRecognition();
identificationResults = ur.getIdentificationResults(URL);
JSONObject jsonObject = JSONObject.parseObject(identificationResults);
//得到JsonString中的用户话语。
String word=jsonObject.getString("Result");
if (!word.equals("")){
System.out.println("识别结果为:"+word);
}else {
System.out.println("很抱歉,未能识别出您的话。");
}
}
}
package demorecognition.util;
import com.qcloud.cos.COSClient;
import com.qcloud.cos.ClientConfig;
import com.qcloud.cos.auth.BasicCOSCredentials;
import com.qcloud.cos.auth.COSCredentials;
import com.qcloud.cos.model.ObjectMetadata;
import com.qcloud.cos.model.PutObjectRequest;
import com.qcloud.cos.model.PutObjectResult;
import com.qcloud.cos.region.Region;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.web.multipart.MultipartFile;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.UUID;
public class COSUpLoadUtil {
private static final Logger logger = LoggerFactory.getLogger(COSUpLoadUtil.class);
// 初始化用户身份信息。
private static String secretId = "";
private static String secretKey = "";
// 地域。
private static String bucketRegion = "ap-shanghai";
// bucket名称。
private static String bucketName = "";
//根据需要设置,参考官方文档。
private static String basicPath = "";
// 创建cos客户端。
private static COSCredentials cred = new BasicCOSCredentials(secretId, secretKey);
private static Region region = new Region(bucketRegion);
private static ClientConfig clientConfig = new ClientConfig(region);
public static String upLoadFile2COS(Long fileSize, String filename, MultipartFile file, String filepath) throws IOException {
// 创建cos客户端。
COSClient cosClient = new COSClient(cred, clientConfig);
// 获取输入流
InputStream inputStream = new BufferedInputStream(file.getInputStream());
ObjectMetadata objectMetadata = new ObjectMetadata();
// 设置输入流长度为500。
// 这里要强调一下,因为腾讯云支持本地文件上传和文件流上传,
// 为了不必要的麻烦所以选择文件流上传,根据官方文档,为了
// 避免oom,必须要设置元数据并告知输入流长度。
objectMetadata.setContentLength(fileSize);
//上传对象,命名采用UUID防止文件名重复。
String key=basicPath + "/" + filepath + "/" + UUID.randomUUID().toString() + filename;
PutObjectRequest putObjectRequest = new PutObjectRequest(bucketName, key, inputStream, objectMetadata);
PutObjectResult putObjectResult = cosClient.putObject(putObjectRequest);
//通过回调函数判断是否上传成功,有etag信息则表示上传成功,否则上传失败。
if (putObjectResult.getETag() != null){
logger.info("文件上传成功。");
}else{
logger.info("文件上传失败。");
return null;
}
//设置过期时间为一周。
logger.info("开始生成URL。");
Date expiredTime = new Date(System.currentTimeMillis() + (3600L * 1000L * 24L * 7L));
//生成访问对象的URL的String形式。
String url = cosClient.generatePresignedUrl(bucketName, key, expiredTime).toString();
// 完成上传之后,关闭连接。
destory(cosClient);
//判断生成URL是否成功。
if (url != null){
logger.info("url生成成功。");
logger.info("生成的URL为:"+url);
return url;
} else{
logger.info("url生成失败。");
return null;
}
}
// 关闭连接。
public static void destory(COSClient cosClient) {
cosClient.shutdown();
}
}
package demorecognition.recognition;
import com.tencentcloudapi.asr.v20190614.AsrClient;
import com.tencentcloudapi.asr.v20190614.models.SentenceRecognitionRequest;
import com.tencentcloudapi.asr.v20190614.models.SentenceRecognitionResponse;
import com.tencentcloudapi.common.Credential;
import com.tencentcloudapi.common.exception.TencentCloudSDKException;
import com.tencentcloudapi.common.profile.ClientProfile;
import com.tencentcloudapi.common.profile.HttpProfile;
import java.io.UnsupportedEncodingException;
public class URLRecognition {
public String getIdentificationResults(String url) {
String SecretId = "";
String SecretKey = "";
String identificationResults=null;
// 采用语音URL方式调用(一句话识别)。
try {
Credential cred = new Credential(SecretId, SecretKey);
HttpProfile httpProfile = new HttpProfile();
httpProfile.setEndpoint("asr.tencentcloudapi.com");
ClientProfile clientProfile = new ClientProfile();
clientProfile.setHttpProfile(httpProfile);
AsrClient client = new AsrClient(cred, "ap-shanghai", clientProfile);
//URL在参数中。("Url":"https://ruskin-1256085166.cos.ap-shanghai.myqcloud.com/test.wav")
String params = "{\"ProjectId\":0,\"SubServiceType\":2,\"EngSerViceType\":\"16k_zh\",\"Source" +
"Type\":0,\"Url\":\"" + url + "\",\"VoiceFormat\":\"wav\",\"UsrAudioKey\":\"session-123\"}";
SentenceRecognitionRequest req = SentenceRecognitionRequest.fromJsonString(params, SentenceRecognitionRequest.class);
SentenceRecognitionResponse resp = client.SentenceRecognition(req);
identificationResults = SentenceRecognitionRequest.toJsonString(resp);
} catch (TencentCloudSDKException | UnsupportedEncodingException e) {
System.out.println(e.toString());
}
return identificationResults;
}
}
对于识别结果也可通过设置热词表,使识别结果更加满足日常使用。–>腾讯云语音识别创建热词表
得到热词表Id后,设置到参数中即可使用。(如:“HotwordId”:qwertyuiop123456)