讯飞语音实时读取合成的音频流数据并转换为AudioClip

在讯飞语音合成音频的过程中,我们可能需要读取实时合成的音频流数据。可以按照下方的官方教程配后后面给出的两个帮助类来解决。

合成中除了音频文件如何获取合成的音频流数据:http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=11426

获得的音频流数据是PCM格式,可以通过下面的工具类转换为Wav,进而转换为AudioClip.

使用讯飞语音时,将PCM文件转WAV文件。

using System;
using System.IO;
using System.Text;
using System.Runtime.InteropServices;
using UnityEngine;

public class PCM2WAV:MonoBehaviour
{
	/// 
	/// ERROR MESSAGE
	/// 
	const string ERRFILENOTEXITS = "File is Not Exits.";
	const string ERRFILEISNOTWAVE = "File is not Wava.";
	/// 
	/// Wave Hander information
	/// 
	struct HeaderType
	{
		public byte[] riff;                 /*RIFF类资源文件头部 4byte*/
		public uint file_len;                /*文件长度4byte*/
		public byte[] wave;                  /*"WAVE"标志4byte*/
		public byte[] fmt;                   /*"fmt"标志4byte*/
		public uint NI1;                     /*过渡字节4byte*/
		public ushort format_type;           /*格式类别(10H为PCM形式的声音数据)2byte*/
		public ushort Channels;              /*Channels 1 = 单声道; 2 = 立体声2byte*/
		public uint frequency;               /*采样频率4byte*/
		public uint trans_speed;             /*音频数据传送速率4byte*/
		public ushort dataBlock;             /*数据块的调整数(按字节算的)2byte*/
		public ushort sample_bits;           /*样本的数据位数(8/16) 2byte*/
		public byte[] data;                  /*数据标记符"data" 4byte*/
		public uint wav_len;                 /*语音数据的长度 4byte*/
	}
	private HeaderType wavHander;       //定义一个头结构体
	private byte[] buff = new byte[44]; //header byte
	private byte[] databuff;            //data byte

	public static PCM2WAV instance;

	private void Awake()
	{
		instance = this;
		InitialStruct();
	}

	/// 
	/// 初始化结构体中的数组长度,分配内存
	/// 
	private void InitialStruct()
	{
		wavHander.riff = new byte[4];//RIFF
		wavHander.wave = new byte[4];//WAVE
		wavHander.fmt = new byte[4];//fmt 
		wavHander.data = new byte[4];//data
	}

	public byte[] Pcm2WAV(byte[] bytes)
	{
		int filelen = (int)bytes.Length;//获取文件长度
		databuff = new byte[filelen + 44];//分配 内存
		Array.Copy(bytes,0,databuff,44,bytes.Length);

		InitHeader ();

		return databuff;
	}

	/// 
	/// 为PCM文件构建文件头,准备转换为WAV文件
	/// 
	/// 构建成功返回真
	private bool InitHeader()
	{
		wavHander.riff = Encoding.ASCII.GetBytes("RIFF");   /*RIFF类资源文件头部 4byte*/
		wavHander.file_len = (uint)(databuff.Length);              /*文件长度4byte*/
		wavHander.wave = Encoding.ASCII.GetBytes("WAVE");     /*"WAVE"标志4byte*/
		wavHander.fmt = Encoding.ASCII.GetBytes("fmt ");      /*"fmt"标志4byte*/
		wavHander.NI1 = 0x10;                               /*过渡字节4byte*/
		wavHander.format_type = 0x01;                       /*格式类别(10H为PCM形式的声音数据)2byte*/
		wavHander.Channels = 0x01;                          /*Channels 1 = 单声道; 2 = 立体声2byte*/
		wavHander.frequency = 0x1F40;                       /*采样频率4byte*/
		wavHander.trans_speed = 0x3E80;                     /*音频数据传送速率4byte*/
		wavHander.dataBlock = 0x02;                         /*数据块的调整数(按字节算的)2byte*/
		wavHander.sample_bits = 0x10;                       /*样本的数据位数(8/16) 2byte*/
		wavHander.data = Encoding.ASCII.GetBytes("data");   /*数据标记符"data" 4byte*/
		wavHander.wav_len = (uint)(databuff.Length - 44);                /*语音数据的长度 4byte*/
		byte[] byt2;//临时变量 ,保存2位的整数
		byte[] byt4;//临时变量, 保存4位的整数
		Encoding.ASCII.GetBytes(Encoding.ASCII.GetString(wavHander.riff), 0, 4, databuff, 0);/*RIFF类资源文件头部 4byte*/
		byt4 = BitConverter.GetBytes(wavHander.file_len); /*文件长度4byte*/
		Array.Copy(byt4, 0, databuff, 4, 4);
		Encoding.ASCII.GetBytes(Encoding.ASCII.GetString(wavHander.wave), 0, 4, databuff, 8);/*"WAVE"标志4byte*/
		Encoding.ASCII.GetBytes(Encoding.ASCII.GetString(wavHander.fmt), 0, 4, databuff, 12);/*"fmt"标志4byte*/
		byt4 = BitConverter.GetBytes(wavHander.NI1);/*过渡字节4byte*/
		Array.Copy(byt4, 0, databuff, 16, 4);
		byt2 = BitConverter.GetBytes(wavHander.format_type);/*格式类别(10H为PCM形式的声音数据)2byte*/
		Array.Copy(byt2, 0, databuff, 20, 2);
		byt2 = BitConverter.GetBytes(wavHander.Channels);/*Channels 1 = 单声道; 2 = 立体声2byte*/
		Array.Copy(byt2, 0, databuff, 22, 2);
		byt4 = BitConverter.GetBytes(wavHander.frequency);/*采样频率4byte*/
		Array.Copy(byt4, 0, databuff, 24, 4);
		byt4 = BitConverter.GetBytes(wavHander.trans_speed);/*音频数据传送速率4byte*/
		Array.Copy(byt4, 0, databuff, 28, 4);
		byt2 = BitConverter.GetBytes(wavHander.dataBlock);/*数据块的调整数(按字节算的)2byte*/
		Array.Copy(byt2, 0, databuff, 32, 2);
		byt2 = BitConverter.GetBytes(wavHander.sample_bits);/*样本的数据位数(8/16) 2byte*/
		Array.Copy(byt2, 0, databuff, 34, 2);
		Encoding.ASCII.GetBytes(Encoding.ASCII.GetString(wavHander.data), 0, 4, databuff, 36);/*数据标记符"data" 4byte*/
		byt4 = BitConverter.GetBytes(wavHander.wav_len); /*语音数据的长度 4byte*/
		Array.Copy(byt4, 0, databuff, 40, 4);
		return true;
	}

	/// 
	/// 把文件头数组信息保存到结构体中
	/// 
	/// 文件头数组
	/// 保存成功返回真
	bool fixedData(byte[] pbuff)
	{

		Array.Copy(pbuff, 0, wavHander.riff, 0, 4);//
		if (Encoding.ASCII.GetString(wavHander.riff) != "RIFF")//确定文件是WAVA类型
			return false;
		wavHander.file_len = BitConverter.ToUInt32(pbuff, 4);
		Array.Copy(pbuff, 8, wavHander.wave, 0, 4);
		Array.Copy(pbuff, 12, wavHander.fmt, 0, 4);
		wavHander.NI1 = BitConverter.ToUInt32(pbuff, 16);
		wavHander.format_type = BitConverter.ToUInt16(pbuff, 20);
		wavHander.Channels = BitConverter.ToUInt16(pbuff, 22);
		wavHander.frequency = BitConverter.ToUInt32(pbuff, 24);
		wavHander.trans_speed = BitConverter.ToUInt32(pbuff, 28);
		wavHander.dataBlock = BitConverter.ToUInt16(pbuff, 32);
		wavHander.sample_bits = BitConverter.ToUInt16(pbuff, 34);
		Array.Copy(pbuff, 36, wavHander.data, 0, 4);
		wavHander.wav_len = BitConverter.ToUInt32(pbuff, 40);
		return true;
	}
}

在Unity中直接转WAV文件到AudioClip文件。

using UnityEngine;
using System.Text;
using System.IO;
using System;

/// 
/// WAV utility for recording and audio playback functions in Unity.
/// Version: 1.0 alpha 1
///
/// - Use "ToAudioClip" method for loading wav file / bytes.
/// Loads .wav (PCM uncompressed) files at 8,16,24 and 32 bits and converts data to Unity's AudioClip.
///
/// - Use "FromAudioClip" method for saving wav file / bytes.
/// Converts an AudioClip's float data into wav byte array at 16 bit.
/// 
/// 
/// For documentation and usage examples: https://github.com/deadlyfingers/UnityWav
/// 

public class WavUtility
{
	// Force save as 16-bit .wav
	const int BlockSize_16Bit = 2;

	/// 
	/// Load PCM format *.wav audio file (using Unity's Application data path) and convert to AudioClip.
	/// 
	/// The AudioClip.
	/// Local file path to .wav file
	public static AudioClip ToAudioClip (string filePath)
	{
		if (!filePath.StartsWith (Application.persistentDataPath) && !filePath.StartsWith (Application.dataPath)) {
			Debug.LogWarning ("This only supports files that are stored using Unity's Application data path. \nTo load bundled resources use 'Resources.Load(\"filename\") typeof(AudioClip)' method. \nhttps://docs.unity3d.com/ScriptReference/Resources.Load.html");
			return null;
		}
		byte[] fileBytes = File.ReadAllBytes (filePath);
		return ToAudioClip (fileBytes, 0);
	}

	public static AudioClip ToAudioClip (byte[] fileBytes, int offsetSamples = 0, string name = "wav")
	{
		//string riff = Encoding.ASCII.GetString (fileBytes, 0, 4);
		//string wave = Encoding.ASCII.GetString (fileBytes, 8, 4);
		int subchunk1 = BitConverter.ToInt32 (fileBytes, 16);
		UInt16 audioFormat = BitConverter.ToUInt16 (fileBytes, 20);

		// NB: Only uncompressed PCM wav files are supported.
		string formatCode = FormatCode (audioFormat);
		Debug.AssertFormat (audioFormat == 1 || audioFormat == 65534, "Detected format code '{0}' {1}, but only PCM and WaveFormatExtensable uncompressed formats are currently supported.", audioFormat, formatCode);

		UInt16 channels = BitConverter.ToUInt16 (fileBytes, 22);
		int sampleRate = BitConverter.ToInt32 (fileBytes, 24);
		//int byteRate = BitConverter.ToInt32 (fileBytes, 28);
		//UInt16 blockAlign = BitConverter.ToUInt16 (fileBytes, 32);
		UInt16 bitDepth = BitConverter.ToUInt16 (fileBytes, 34);

		int headerOffset = 16 + 4 + subchunk1 + 4;
		int subchunk2 = BitConverter.ToInt32 (fileBytes, headerOffset);
		//Debug.LogFormat ("riff={0} wave={1} subchunk1={2} format={3} channels={4} sampleRate={5} byteRate={6} blockAlign={7} bitDepth={8} headerOffset={9} subchunk2={10} filesize={11}", riff, wave, subchunk1, formatCode, channels, sampleRate, byteRate, blockAlign, bitDepth, headerOffset, subchunk2, fileBytes.Length);

		float[] data;
		switch (bitDepth) {
		case 8:
			data = Convert8BitByteArrayToAudioClipData (fileBytes, headerOffset, subchunk2);
			break;
		case 16:
			data = Convert16BitByteArrayToAudioClipData (fileBytes, headerOffset, subchunk2);
			break;
		case 24:
			data = Convert24BitByteArrayToAudioClipData (fileBytes, headerOffset, subchunk2);
			break;
		case 32:
			data = Convert32BitByteArrayToAudioClipData (fileBytes, headerOffset, subchunk2);
			break;
		default:
			throw new Exception (bitDepth + " bit depth is not supported.");
		}

		AudioClip audioClip = AudioClip.Create (name, data.Length, (int)channels, sampleRate, false);
		audioClip.SetData (data, 0);
		return audioClip;
	}

	#region wav file bytes to Unity AudioClip conversion methods

	private static float[] Convert8BitByteArrayToAudioClipData (byte[] source, int headerOffset, int dataSize)
	{
		int wavSize = BitConverter.ToInt32 (source, headerOffset);
		headerOffset += sizeof(int);
		Debug.AssertFormat (wavSize > 0 && wavSize == dataSize, "Failed to get valid 8-bit wav size: {0} from data bytes: {1} at offset: {2}", wavSize, dataSize, headerOffset);

		float[] data = new float[wavSize];

		sbyte maxValue = sbyte.MaxValue;

		int i = 0;
		while (i < wavSize) {
			data [i] = (float)source [i] / maxValue;
			++i;
		}

		return data;
	}

	private static float[] Convert16BitByteArrayToAudioClipData (byte[] source, int headerOffset, int dataSize)
	{
		int wavSize = BitConverter.ToInt32 (source, headerOffset);
		headerOffset += sizeof(int);
		Debug.AssertFormat (wavSize > 0 && wavSize == dataSize, "Failed to get valid 16-bit wav size: {0} from data bytes: {1} at offset: {2}", wavSize, dataSize, headerOffset);

		int x = sizeof(Int16); // block size = 2
		int convertedSize = wavSize / x;

		float[] data = new float[convertedSize];

		Int16 maxValue = Int16.MaxValue;

		int offset = 0;
		int i = 0;
		while (i < convertedSize) {
			offset = i * x + headerOffset;
			data [i] = (float)BitConverter.ToInt16 (source, offset) / maxValue;
			++i;
		}

		Debug.AssertFormat (data.Length == convertedSize, "AudioClip .wav data is wrong size: {0} == {1}", data.Length, convertedSize);

		return data;
	}

	private static float[] Convert24BitByteArrayToAudioClipData (byte[] source, int headerOffset, int dataSize)
	{
		int wavSize = BitConverter.ToInt32 (source, headerOffset);
		headerOffset += sizeof(int);
		Debug.AssertFormat (wavSize > 0 && wavSize == dataSize, "Failed to get valid 24-bit wav size: {0} from data bytes: {1} at offset: {2}", wavSize, dataSize, headerOffset);

		int x = 3; // block size = 3
		int convertedSize = wavSize / x;

		int maxValue = Int32.MaxValue;

		float[] data = new float[convertedSize];

		byte[] block = new byte[sizeof(int)]; // using a 4 byte block for copying 3 bytes, then copy bytes with 1 offset

		int offset = 0;
		int i = 0;
		while (i < convertedSize) {
			offset = i * x + headerOffset;
			Buffer.BlockCopy (source, offset, block, 1, x);
			data [i] = (float)BitConverter.ToInt32 (block, 0) / maxValue;
			++i;
		}

		Debug.AssertFormat (data.Length == convertedSize, "AudioClip .wav data is wrong size: {0} == {1}", data.Length, convertedSize);

		return data;
	}

	private static float[] Convert32BitByteArrayToAudioClipData (byte[] source, int headerOffset, int dataSize)
	{
		int wavSize = BitConverter.ToInt32 (source, headerOffset);
		headerOffset += sizeof(int);
		Debug.AssertFormat (wavSize > 0 && wavSize == dataSize, "Failed to get valid 32-bit wav size: {0} from data bytes: {1} at offset: {2}", wavSize, dataSize, headerOffset);

		int x = sizeof(float); //  block size = 4
		int convertedSize = wavSize / x;

		Int32 maxValue = Int32.MaxValue;

		float[] data = new float[convertedSize];

		int offset = 0;
		int i = 0;
		while (i < convertedSize) {
			offset = i * x + headerOffset;
			data [i] = (float)BitConverter.ToInt32 (source, offset) / maxValue;
			++i;
		}

		Debug.AssertFormat (data.Length == convertedSize, "AudioClip .wav data is wrong size: {0} == {1}", data.Length, convertedSize);

		return data;
	}

	#endregion

	public static byte[] FromAudioClip (AudioClip audioClip)
	{
		string file;
		return FromAudioClip (audioClip, out file, false);
	}

	public static byte[] FromAudioClip (AudioClip audioClip, out string filepath, bool saveAsFile = true, string dirname = "recordings")
	{
		MemoryStream stream = new MemoryStream ();

		const int headerSize = 44;

		// get bit depth
		UInt16 bitDepth = 16; //BitDepth (audioClip);

		// NB: Only supports 16 bit
		//Debug.AssertFormat (bitDepth == 16, "Only converting 16 bit is currently supported. The audio clip data is {0} bit.", bitDepth);

		// total file size = 44 bytes for header format and audioClip.samples * factor due to float to Int16 / sbyte conversion
		int fileSize = audioClip.samples * BlockSize_16Bit + headerSize; // BlockSize (bitDepth)

		// chunk descriptor (riff)
		WriteFileHeader (ref stream, fileSize);
		// file header (fmt)
		WriteFileFormat (ref stream, audioClip.channels, audioClip.frequency, bitDepth);
		// data chunks (data)
		WriteFileData (ref stream, audioClip, bitDepth);

		byte[] bytes = stream.ToArray ();

		// Validate total bytes
		Debug.AssertFormat (bytes.Length == fileSize, "Unexpected AudioClip to wav format byte count: {0} == {1}", bytes.Length, fileSize);

		// Save file to persistant storage location
		if (saveAsFile) {
			filepath = string.Format ("{0}/{1}/{2}.{3}", Application.persistentDataPath, dirname, DateTime.UtcNow.ToString ("yyMMdd-HHmmss-fff"), "wav");
			Directory.CreateDirectory (Path.GetDirectoryName (filepath));
			File.WriteAllBytes (filepath, bytes);
			//Debug.Log ("Auto-saved .wav file: " + filepath);
		} else {
			filepath = null;
		}

		stream.Dispose ();

		return bytes;
	}

	#region write .wav file functions

	private static int WriteFileHeader (ref MemoryStream stream, int fileSize)
	{
		int count = 0;
		int total = 12;

		// riff chunk id
		byte[] riff = Encoding.ASCII.GetBytes ("RIFF");
		count += WriteBytesToMemoryStream (ref stream, riff, "ID");

		// riff chunk size
		int chunkSize = fileSize - 8; // total size - 8 for the other two fields in the header
		count += WriteBytesToMemoryStream (ref stream, BitConverter.GetBytes (chunkSize), "CHUNK_SIZE");

		byte[] wave = Encoding.ASCII.GetBytes ("WAVE");
		count += WriteBytesToMemoryStream (ref stream, wave, "FORMAT");

		// Validate header
		Debug.AssertFormat (count == total, "Unexpected wav descriptor byte count: {0} == {1}", count, total);

		return count;
	}

	private static int WriteFileFormat (ref MemoryStream stream, int channels, int sampleRate, UInt16 bitDepth)
	{
		int count = 0;
		int total = 24;

		byte[] id = Encoding.ASCII.GetBytes ("fmt ");
		count += WriteBytesToMemoryStream (ref stream, id, "FMT_ID");

		int subchunk1Size = 16; // 24 - 8
		count += WriteBytesToMemoryStream (ref stream, BitConverter.GetBytes (subchunk1Size), "SUBCHUNK_SIZE");

		UInt16 audioFormat = 1;
		count += WriteBytesToMemoryStream (ref stream, BitConverter.GetBytes (audioFormat), "AUDIO_FORMAT");

		UInt16 numChannels = Convert.ToUInt16 (channels);
		count += WriteBytesToMemoryStream (ref stream, BitConverter.GetBytes (numChannels), "CHANNELS");

		count += WriteBytesToMemoryStream (ref stream, BitConverter.GetBytes (sampleRate), "SAMPLE_RATE");

		int byteRate = sampleRate * channels * BytesPerSample (bitDepth);
		count += WriteBytesToMemoryStream (ref stream, BitConverter.GetBytes (byteRate), "BYTE_RATE");

		UInt16 blockAlign = Convert.ToUInt16 (channels * BytesPerSample (bitDepth));
		count += WriteBytesToMemoryStream (ref stream, BitConverter.GetBytes (blockAlign), "BLOCK_ALIGN");

		count += WriteBytesToMemoryStream (ref stream, BitConverter.GetBytes (bitDepth), "BITS_PER_SAMPLE");

		// Validate format
		Debug.AssertFormat (count == total, "Unexpected wav fmt byte count: {0} == {1}", count, total);

		return count;
	}

	private static int WriteFileData (ref MemoryStream stream, AudioClip audioClip, UInt16 bitDepth)
	{
		int count = 0;
		int total = 8;

		// Copy float[] data from AudioClip
		float[] data = new float[audioClip.samples * audioClip.channels];
		audioClip.GetData (data, 0);

		byte[] bytes = ConvertAudioClipDataToInt16ByteArray (data);

		byte[] id = Encoding.ASCII.GetBytes ("data");
		count += WriteBytesToMemoryStream (ref stream, id, "DATA_ID");

		int subchunk2Size = Convert.ToInt32 (audioClip.samples * BlockSize_16Bit); // BlockSize (bitDepth)
		count += WriteBytesToMemoryStream (ref stream, BitConverter.GetBytes (subchunk2Size), "SAMPLES");

		// Validate header
		Debug.AssertFormat (count == total, "Unexpected wav data id byte count: {0} == {1}", count, total);

		// Write bytes to stream
		count += WriteBytesToMemoryStream (ref stream, bytes, "DATA");

		// Validate audio data
		Debug.AssertFormat (bytes.Length == subchunk2Size, "Unexpected AudioClip to wav subchunk2 size: {0} == {1}", bytes.Length, subchunk2Size);

		return count;
	}

	private static byte[] ConvertAudioClipDataToInt16ByteArray (float[] data)
	{
		MemoryStream dataStream = new MemoryStream ();

		int x = sizeof(Int16);

		Int16 maxValue = Int16.MaxValue;

		int i = 0;
		while (i < data.Length) {
			dataStream.Write (BitConverter.GetBytes (Convert.ToInt16 (data [i] * maxValue)), 0, x);
			++i;
		}
		byte[] bytes = dataStream.ToArray ();

		// Validate converted bytes
		Debug.AssertFormat (data.Length * x == bytes.Length, "Unexpected float[] to Int16 to byte[] size: {0} == {1}", data.Length * x, bytes.Length);

		dataStream.Dispose ();

		return bytes;
	}

	private static int WriteBytesToMemoryStream (ref MemoryStream stream, byte[] bytes, string tag = "")
	{
		int count = bytes.Length;
		stream.Write (bytes, 0, count);
		//Debug.LogFormat ("WAV:{0} wrote {1} bytes.", tag, count);
		return count;
	}

	#endregion

	/// 
	/// Calculates the bit depth of an AudioClip
	/// 
	/// The bit depth. Should be 8 or 16 or 32 bit.
	/// Audio clip.
	public static UInt16 BitDepth (AudioClip audioClip)
	{
		UInt16 bitDepth = Convert.ToUInt16 (audioClip.samples * audioClip.channels * audioClip.length / audioClip.frequency);
		Debug.AssertFormat (bitDepth == 8 || bitDepth == 16 || bitDepth == 32, "Unexpected AudioClip bit depth: {0}. Expected 8 or 16 or 32 bit.", bitDepth);
		return bitDepth;
	}

	private static int BytesPerSample (UInt16 bitDepth)
	{
		return bitDepth / 8;
	}

	private static int BlockSize (UInt16 bitDepth)
	{
		switch (bitDepth) {
		case 32:
			return sizeof(Int32); // 32-bit -> 4 bytes (Int32)
		case 16:
			return sizeof(Int16); // 16-bit -> 2 bytes (Int16)
		case 8:
			return sizeof(sbyte); // 8-bit -> 1 byte (sbyte)
		default:
			throw new Exception (bitDepth + " bit depth is not supported.");
		}
	}

	private static string FormatCode (UInt16 code)
	{
		switch (code) {
		case 1:
			return "PCM";
		case 2:
			return "ADPCM";
		case 3:
			return "IEEE";
		case 7:
			return "μ-law";
		case 65534:
			return "WaveFormatExtensable";
		default:
			Debug.LogWarning ("Unknown wav code format:" + code);
			return "";
		}
	}

}

你可能感兴趣的:(android,unity3d)