Unity 科大讯飞离线语音合成
好久没有更新文章了,今天我们继续更新科大讯飞的语音的文章。
之前在语音合成部分由于在线语音合成的处理时间太长,所以使用了C#自带的语音合成,处理是快了,但是合成的声音特别难听。
所以今天更新一个离先语音合成的文章。
废话不多说,直接进入正题。
1.下载语音合成SDK,需要选择离线的,否者无法使用离线的。
2.解压SDK,这里我们需要用到common.jet;xiaofeng.jet;xiaoyan.jet这三个.jet文件必须要,缺一不可,否则会报错误代码:文件缺失。然后就是msc_x64.dll库。
3.代码部分
using UnityEngine;
using msc;
using System;
using System.Text;
using System.IO;
using System.Runtime.InteropServices;
using System.Threading;
using UnityEngine.Networking;
using System.Collections;
public class Main : MonoBehaviour
{
private const string speekText = "北京市今天全天晴,气温7℃ ~ 19℃,空气质量优,有北风4-5级,挺凉快的。";
private const string session_begin_params = "voice_name = xiaoyan, text_encoding = utf8, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 0";
private string offline_session_begin_params;
private IntPtr session_id;
private int err_code;
private byte[] bytes;
private void Awake()
{
string xiaoyan_path = (Application.dataPath + "/TTS/xiaoyan.jet").Replace("/", "\\");
string common_path = (Application.dataPath + "/TTS/common.jet").Replace("/", "\\");
offline_session_begin_params = "engine_type = local, voice_name = xiaoyan, text_encoding = utf8, tts_res_path = fo|" + xiaoyan_path + ";fo|" + common_path + ", sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 0";
}
private void Start()
{
int message = MSCDLL.MSPLogin("", "", "appid=5f80198b,word_dir= . ");
if (message != (int)Errors.MSP_SUCCESS)
{
Debug.LogError("登录失败!错误信息:" + message);
}
Debug.Log("登录成功");
}
private void Update()
{
if (Input.GetMouseButtonDown(0))
{
//Online_TTS(speekText);
Offline_TTS(speekText);
}
}
private void Online_TTS(string speekText)
{
//语音合成开始
session_id = MSCDLL.QTTSSessionBegin(session_begin_params, ref err_code);
if (err_code != (int)Errors.MSP_SUCCESS)
{
Debug.LogError("初始化语音合成失败,错误信息:" + err_code);
return;
}
//语音合成设置文本
err_code = MSCDLL.QTTSTextPut(session_id, speekText, (uint)Encoding.Default.GetByteCount(speekText), string.Empty);
if (err_code != (int)Errors.MSP_SUCCESS)
{
Debug.LogError("向服务器发送数据失败,错误信息:" + err_code);
return;
}
uint audio_len = 0;
SynthStatus synth_status = SynthStatus.MSP_TTS_FLAG_STILL_HAVE_DATA;
MemoryStream memoryStream = new MemoryStream();
memoryStream.Write(new byte[44], 0, 44);
while (true)
{
IntPtr source = MSCDLL.QTTSAudioGet(session_id, ref audio_len, ref synth_status, ref err_code);
byte[] array = new byte[audio_len];
if (audio_len > 0)
{
Marshal.Copy(source, array, 0, (int)audio_len);
}
memoryStream.Write(array, 0, array.Length);
Thread.Sleep(100);
if (synth_status == SynthStatus.MSP_TTS_FLAG_DATA_END || err_code != (int)Errors.MSP_SUCCESS)
break;
}
err_code = MSCDLL.QTTSSessionEnd(session_id, "");
if (err_code != (int)Errors.MSP_SUCCESS)
{
Debug.LogError("会话结束失败!错误信息: " + err_code);
return;
}
WAVE_Header header = getWave_Header((int)memoryStream.Length - 44);//创建wav文件头
byte[] headerByte = StructToBytes(header);//把文件头结构转化为字节数组
memoryStream.Position = 0;//定位到文件头
memoryStream.Write(headerByte, 0, headerByte.Length);//写入文件头
bytes = memoryStream.ToArray();
memoryStream.Close();
if (Application.streamingAssetsPath + "/" + name + ".wav" != null)
{
if (File.Exists(Application.streamingAssetsPath + "/" + name + ".wav"))
File.Delete(Application.streamingAssetsPath + "/" + name + ".wav");
File.WriteAllBytes(Application.streamingAssetsPath + "/" + name + ".wav", bytes);
StartCoroutine(OnAudioLoadAndPaly(Application.streamingAssetsPath + "/" + name + ".wav", AudioType.WAV, gameObject.GetComponent<AudioSource>()));
}
Debug.Log("合成结束成功");
}
private void Offline_TTS(string speekText)
{
//语音合成开始
session_id = MSCDLL.QTTSSessionBegin(offline_session_begin_params, ref err_code);
if (err_code != (int)Errors.MSP_SUCCESS)
{
Debug.LogError("初始化语音合成失败,错误信息:" + err_code);
return;
}
//语音合成设置文本
err_code = MSCDLL.QTTSTextPut(session_id, speekText, (uint)Encoding.Default.GetByteCount(speekText), string.Empty);
if (err_code != (int)Errors.MSP_SUCCESS)
{
Debug.LogError("向服务器发送数据失败,错误信息:" + err_code);
return;
}
uint audio_len = 0;
SynthStatus synth_status = SynthStatus.MSP_TTS_FLAG_STILL_HAVE_DATA;
MemoryStream memoryStream = new MemoryStream();
memoryStream.Write(new byte[44], 0, 44);
while (true)
{
IntPtr source = MSCDLL.QTTSAudioGet(session_id, ref audio_len, ref synth_status, ref err_code);
byte[] array = new byte[audio_len];
if (audio_len > 0)
{
Marshal.Copy(source, array, 0, (int)audio_len);
}
memoryStream.Write(array, 0, array.Length);
Thread.Sleep(1);
if (synth_status == SynthStatus.MSP_TTS_FLAG_DATA_END || err_code != (int)Errors.MSP_SUCCESS)
break;
}
err_code = MSCDLL.QTTSSessionEnd(session_id, "");
if (err_code != (int)Errors.MSP_SUCCESS)
{
Debug.LogError("会话结束失败!错误信息: " + err_code);
return;
}
WAVE_Header header = getWave_Header((int)memoryStream.Length - 44);//创建wav文件头
byte[] headerByte = StructToBytes(header);//把文件头结构转化为字节数组
memoryStream.Position = 0;//定位到文件头
memoryStream.Write(headerByte, 0, headerByte.Length);//写入文件头
bytes = memoryStream.ToArray();
memoryStream.Close();
if (Application.streamingAssetsPath + "/" + name + ".wav" != null)
{
if (File.Exists(Application.streamingAssetsPath + "/" + name + ".wav"))
File.Delete(Application.streamingAssetsPath + "/" + name + ".wav");
File.WriteAllBytes(Application.streamingAssetsPath + "/" + name + ".wav", bytes);
StartCoroutine(OnAudioLoadAndPaly(Application.streamingAssetsPath + "/" + name + ".wav", AudioType.WAV, gameObject.GetComponent<AudioSource>()));
}
Debug.Log("合成结束成功");
}
/// <summary>
/// 结构体转字符串
/// </summary>
/// <param name="structure"></param>
/// <returns></returns>
private byte[] StructToBytes(object structure)
{
int num = Marshal.SizeOf(structure);
IntPtr intPtr = Marshal.AllocHGlobal(num);
byte[] result;
try
{
Marshal.StructureToPtr(structure, intPtr, false);
byte[] array = new byte[num];
Marshal.Copy(intPtr, array, 0, num);
result = array;
}
finally
{
Marshal.FreeHGlobal(intPtr);
}
return result;
}
/// <summary>
/// 结构体初始化赋值
/// </summary>
/// <param name="data_len"></param>
/// <returns></returns>
private WAVE_Header getWave_Header(int data_len)
{
return new WAVE_Header
{
RIFF_ID = 1179011410,
File_Size = data_len + 36,
RIFF_Type = 1163280727,
FMT_ID = 544501094,
FMT_Size = 16,
FMT_Tag = 1,
FMT_Channel = 1,
FMT_SamplesPerSec = 16000,
AvgBytesPerSec = 32000,
BlockAlign = 2,
BitsPerSample = 16,
DATA_ID = 1635017060,
DATA_Size = data_len
};
}
/// <summary>
/// 语音音频头
/// </summary>
private struct WAVE_Header
{
public int RIFF_ID;
public int File_Size;
public int RIFF_Type;
public int FMT_ID;
public int FMT_Size;
public short FMT_Tag;
public ushort FMT_Channel;
public int FMT_SamplesPerSec;
public int AvgBytesPerSec;
public ushort BlockAlign;
public ushort BitsPerSample;
public int DATA_ID;
public int DATA_Size;
}
/// <summary>
/// UnityWebRequest 加载音频播放
/// </summary>
/// <param name="url">路径</param>
/// <param name="type">音频格式</param>
/// <param name="audio">音频</param>
/// <returns></returns>
public IEnumerator OnAudioLoadAndPaly(string url, AudioType type, AudioSource audio)
{
UnityWebRequest www = UnityWebRequestMultimedia.GetAudioClip(url, type);
yield return www.SendWebRequest();
if (www.isHttpError || www.isNetworkError)
Debug.LogError(www.error);
else
{
AudioClip clip = DownloadHandlerAudioClip.GetContent(www);
audio.clip = clip;
audio.Play();
}
}
private void OnDestroy()
{
MSCDLL.MSPLogout();
Debug.Log("注销成功");
}
}
4.在线参数和离线参数区别
///在线参数
session_begin_params = "voice_name = xiaoyan, text_encoding = utf8, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 0";
///离线参数
string xiaoyan_path = (Application.dataPath + "/TTS/xiaoyan.jet").Replace("/", "\\");//这个斜杠更换前面的文章有说过,这里就不说了
string common_path = (Application.dataPath + "/TTS/common.jet").Replace("/", "\\");//不更换会报参数信息错误的问题
session_begin_params = "engine_type = local, voice_name = xiaoyan, text_encoding = utf8, tts_res_path = fo|" + xiaoyan_path + ";fo|" + common_path + ", sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 0";
多了"engine_type","tts_res_path "两个参数
5.在线合成方法和离线合成方法的区别
private void Online_TTS(string speekText)
{
//语音合成开始
session_id = MSCDLL.QTTSSessionBegin(session_begin_params, ref err_code);
if (err_code != (int)Errors.MSP_SUCCESS)
{
Debug.LogError("初始化语音合成失败,错误信息:" + err_code);
return;
}
//语音合成设置文本
err_code = MSCDLL.QTTSTextPut(session_id, speekText, (uint)Encoding.Default.GetByteCount(speekText), string.Empty);
if (err_code != (int)Errors.MSP_SUCCESS)
{
Debug.LogError("向服务器发送数据失败,错误信息:" + err_code);
return;
}
uint audio_len = 0;
SynthStatus synth_status = SynthStatus.MSP_TTS_FLAG_STILL_HAVE_DATA;
MemoryStream memoryStream = new MemoryStream();
memoryStream.Write(new byte[44], 0, 44);
while (true)
{
IntPtr source = MSCDLL.QTTSAudioGet(session_id, ref audio_len, ref synth_status, ref err_code);
byte[] array = new byte[audio_len];
if (audio_len > 0)
{
Marshal.Copy(source, array, 0, (int)audio_len);
}
memoryStream.Write(array, 0, array.Length);
Thread.Sleep(100);
if (synth_status == SynthStatus.MSP_TTS_FLAG_DATA_END || err_code != (int)Errors.MSP_SUCCESS)
break;
}
err_code = MSCDLL.QTTSSessionEnd(session_id, "");
if (err_code != (int)Errors.MSP_SUCCESS)
{
Debug.LogError("会话结束失败!错误信息: " + err_code);
return;
}
WAVE_Header header = getWave_Header((int)memoryStream.Length - 44);//创建wav文件头
byte[] headerByte = StructToBytes(header);//把文件头结构转化为字节数组
memoryStream.Position = 0;//定位到文件头
memoryStream.Write(headerByte, 0, headerByte.Length);//写入文件头
bytes = memoryStream.ToArray();
memoryStream.Close();
if (Application.streamingAssetsPath + "/" + name + ".wav" != null)
{
if (File.Exists(Application.streamingAssetsPath + "/" + name + ".wav"))
File.Delete(Application.streamingAssetsPath + "/" + name + ".wav");
File.WriteAllBytes(Application.streamingAssetsPath + "/" + name + ".wav", bytes);
StartCoroutine(OnAudioLoadAndPaly(Application.streamingAssetsPath + "/" + name + ".wav", AudioType.WAV, gameObject.GetComponent<AudioSource>()));
}
Debug.Log("合成结束成功");
}
private void Offline_TTS(string speekText)
{
//语音合成开始
session_id = MSCDLL.QTTSSessionBegin(offline_session_begin_params, ref err_code);
if (err_code != (int)Errors.MSP_SUCCESS)
{
Debug.LogError("初始化语音合成失败,错误信息:" + err_code);
return;
}
//语音合成设置文本
err_code = MSCDLL.QTTSTextPut(session_id, speekText, (uint)Encoding.Default.GetByteCount(speekText), string.Empty);
if (err_code != (int)Errors.MSP_SUCCESS)
{
Debug.LogError("向服务器发送数据失败,错误信息:" + err_code);
return;
}
uint audio_len = 0;
SynthStatus synth_status = SynthStatus.MSP_TTS_FLAG_STILL_HAVE_DATA;
MemoryStream memoryStream = new MemoryStream();
memoryStream.Write(new byte[44], 0, 44);
while (true)
{
IntPtr source = MSCDLL.QTTSAudioGet(session_id, ref audio_len, ref synth_status, ref err_code);
byte[] array = new byte[audio_len];
if (audio_len > 0)
{
Marshal.Copy(source, array, 0, (int)audio_len);
}
memoryStream.Write(array, 0, array.Length);
Thread.Sleep(1);
if (synth_status == SynthStatus.MSP_TTS_FLAG_DATA_END || err_code != (int)Errors.MSP_SUCCESS)
break;
}
err_code = MSCDLL.QTTSSessionEnd(session_id, "");
if (err_code != (int)Errors.MSP_SUCCESS)
{
Debug.LogError("会话结束失败!错误信息: " + err_code);
return;
}
WAVE_Header header = getWave_Header((int)memoryStream.Length - 44);//创建wav文件头
byte[] headerByte = StructToBytes(header);//把文件头结构转化为字节数组
memoryStream.Position = 0;//定位到文件头
memoryStream.Write(headerByte, 0, headerByte.Length);//写入文件头
bytes = memoryStream.ToArray();
memoryStream.Close();
if (Application.streamingAssetsPath + "/" + name + ".wav" != null)
{
if (File.Exists(Application.streamingAssetsPath + "/" + name + ".wav"))
File.Delete(Application.streamingAssetsPath + "/" + name + ".wav");
File.WriteAllBytes(Application.streamingAssetsPath + "/" + name + ".wav", bytes);
StartCoroutine(OnAudioLoadAndPaly(Application.streamingAssetsPath + "/" + name + ".wav", AudioType.WAV, gameObject.GetComponent<AudioSource>()));
}
Debug.Log("合成结束成功");
}
线程休眠时间从100变成1
至于为什么在线的休眠时间长,缩短休眠时长无法正常合成的问题,这个还望有大佬知道的说声。以及不能没有休眠的原因。
最后:我是一菜鸡,不停努力的菜鸡。
guiqian1987725: 搞定了么?我的回调不执行
guiqian1987725: 搞定了么?我的回调不执行
2401_84181564: 文章内容通俗易懂,适合不同层次的读者。【我也写了一些相关领域的文章,希望能够得到博主的指导,共同进步!】
2401_84538135: 优质好文,细节很到位!【我也写了一些相关领域的文章,希望能够得到博主的指导,共同进步!】
2401_84538337: 这篇文章是优质之作,内容充实,结构明晰,语言流畅且通俗易懂,适合广大读者阅读。【我也写了一些相关领域的文章,希望能够得到博主的指导,共同进步!】