文字转语音播报模块(二):JACOB 语音模块

一、业务场景

接上篇文章《文字转语音播报模块(一):阿里云nls服务使用示例》提到的需求,笔者也尝试使用JACOB技术方案来实现语音交互。

二、JACOB

JACOB 是一种用于 Java 和 COM 之间的交互的技术,它可以让 Java 程序调用 COM 组件提供的功能,包括 Windows 系统自带的 COM 组件。JACOB 也可以用于语音处理,例如调用微软语音 API(SAPI)来实现语音合成、语音识别和语音控制等功能。
注意:Linux系统无法使用!!!

三、配置与使用

1、windows SDK工具包添加

sdk以及jar包下载:jacob-1.18-x64.dll
将下载后的 jacob-1.18-x64.dll复制到在java安装目录的bin文件夹,我的目录位置是C:\Program Files\Java\jdk-11\bin

2、pom文件引入

<!-- ysp 文本转音频 -->
 <dependency>
    <groupId>com.hynnet</groupId>
    <artifactId>jacob</artifactId>
    <version>1.18</version>
</dependency>

3、相关工具类

import com.jacob.activeX.ActiveXComponent;
import com.jacob.com.Dispatch;
import com.jacob.com.Variant;
import javax.sql.rowset.serial.SerialBlob;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.Blob;
import java.sql.SQLException;
import java.util.Objects;

public class JacobVoiceUtil {

    //语音识别和合成的接口id
    private static final String SPEECH_OBJECT_TOKEN = "SAPI.SpVoice";

    /**
     * 字符串文本阅读
     *
     * @param str 要读的文字字符串
     */
    public static void readStr(String str) {
        ActiveXComponent ax = new ActiveXComponent(SPEECH_OBJECT_TOKEN);
        //运行时输出语音内容
        Dispatch spVoice = ax.getObject();
        //设置音量 0 ~ 100
        ax.setProperty("Volume", new Variant(100));
        //设置朗读速度 -10 ~ +10
        ax.setProperty("Rate", new Variant(0));
        //设置男生、女生
        ax.setProperty("Voice",new Variant(1));
        //执行朗读
        Dispatch.call(spVoice, "Speak", new Variant(str));

        ActiveXComponent sapi = new ActiveXComponent("Sapi.SpVoice");
        Variant voices = sapi.invoke("GetVoices");
        Dispatch voiceDispatch = voices.getDispatch();
        int count = Dispatch.get(voiceDispatch, "Count").getInt();

        for (int i = 0; i < count; i++) {
            Dispatch voice = Dispatch.invoke(voiceDispatch, "Item", Dispatch.Method, new Object[]{new Variant(i)}, 
            new int[0]).toDispatch();
            String name = Dispatch.get(voice, "GetDescription").toString();
            System.out.println(name);
        }
    }

    /**
     * 字符串文本转 wav格式 语音文件
     *
     * @param text 要读的文字字符串
     */
    public static void textToSpeechIO(String text) {
        ActiveXComponent ax = null;
        Dispatch spFileStream = null;
        Dispatch spAudioFormat = null;
        Dispatch spVoice = null;
        try {
            ax = new ActiveXComponent("Sapi.SpFileStream");
            spFileStream = ax.getObject();

            ax = new ActiveXComponent("Sapi.SpAudioFormat");
            spAudioFormat = ax.getObject();

            spVoice = new ActiveXComponent(SPEECH_OBJECT_TOKEN).getObject();
            // 设置音频流格式
            Dispatch.put(spAudioFormat, "Type", new Variant(22));
            // 设置文件输出流格式
            Dispatch.putRef(spFileStream, "Format", spAudioFormat);
            // 调用输出 文件流打开方法,创建一个.wav文件
            Dispatch.call(spFileStream, "Open", new Variant("D:/test.wav"), new Variant(3), new Variant(true));
            // 设置声音对象的音频输出流为输出文件对象
            Dispatch.putRef(spVoice, "AudioOutputStream", spFileStream);
            // 设置音量  0 ~ 100
            Dispatch.put(spVoice, "Volume", new Variant(100));
            // 设置朗读速度  -10 ~ +10
            Dispatch.put(spVoice, "Rate", new Variant(0));

            Dispatch.call(spVoice, "Speak", new Variant(text));

            System.out.println("输出语音文件成功!");
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            // 关闭输出文件
            Dispatch.call(Objects.requireNonNull(spFileStream), "Close");
            Dispatch.putRef(Objects.requireNonNull(spVoice), "AudioOutputStream", null);

            Objects.requireNonNull(spAudioFormat).safeRelease();
            spFileStream.safeRelease();
            spVoice.safeRelease();
            ax.safeRelease();
        }
    }

    /**
     * txt文件转字符串
     *
     * @param fileName txt文件所在位置
     * @return txt文件中的字符串
     */
    public static String textToStr(String fileName) {
        BufferedReader reader = null;
        try {
            reader = new BufferedReader(new FileReader(fileName));
            StringBuilder sb = new StringBuilder();
            String line;
            while ((line = reader.readLine()) != null) {
                sb.append(line);
            }
            return sb.toString();
        } catch (IOException e) {
            e.printStackTrace();
            return "";
        } finally {
            try {
                Objects.requireNonNull(reader).close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }


    /**
     * 文本转字节流数组存储并生成文件
     * @param text 文本信息
     * @param path 路径地址
     * @return 文件流
     *
     * */
    public static Blob textToBlob(String text, String path) throws IOException, SQLException {
        ActiveXComponent component = new ActiveXComponent(SPEECH_OBJECT_TOKEN);
        Dispatch dispatch = component.getObject();
        Long millis = System.currentTimeMillis();
        String name = String.valueOf(millis);
        Dispatch fileStreamDispatch = null;
        Dispatch audioDispatch = null;
        String pathStr = "";
        try {
            // 生成空的语音文件
            component = new ActiveXComponent("Sapi.SpFileStream");
            fileStreamDispatch = component.getObject();
            // 音频
            component = new ActiveXComponent("Sapi.SpAudioFormat");
            audioDispatch = component.getObject();
            // 设置文件流格式
            Dispatch.putRef(fileStreamDispatch, "Format", audioDispatch);
            // 设置视频输出流的格式
            Dispatch.put(audioDispatch, "Type", new Variant(22));
            // 调用输出流打开方法,创建一个.wav .mp3 .mp4 .wma文件
            pathStr = path + name + ".wav";
            Dispatch.call(fileStreamDispatch, "Open", new Variant(pathStr), new Variant(3), new Variant(true));
            // 设置声音对象的音频流输出流为输出文件对象
            Dispatch.putRef(dispatch, "AudioOutputStream", fileStreamDispatch);
            // 设置音量大小
            Dispatch.put(dispatch, "Volume", new Variant(100));
            // 将文本读取进去
            Dispatch.call(dispatch, "Speak", new Variant(text));


        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            // 关闭输出文件流
            Dispatch.call(fileStreamDispatch, "Close");
            Dispatch.putRef(dispatch, "AudioOutputStream", null);
            audioDispatch.safeRelease();
            fileStreamDispatch.safeRelease();
            dispatch.safeRelease();
            component.safeRelease();

            byte[] bFile = Files.readAllBytes(Paths.get(pathStr));
            return new SerialBlob(bFile); // 将字节数组转为 Blob 对象
        }
    }
}

四、尾言

jacob使用会比较简单,主要是需要添加相应SDK工具包,不过因为不能兼容Linux系统,这在生产过程中就很局限了,这也是这次摸索的痛点,后续笔者也会继续尝试新的开源方案来实现这个功能。
以上是全部的内容,感谢阅读和指正。

你可能感兴趣的:(语音交互模块,后端的路,java,windows)