315 lines
12 KiB
Python
315 lines
12 KiB
Python
import os
|
|
import soundfile as sf
|
|
import redis
|
|
import hashlib
|
|
import json
|
|
import traceback
|
|
from kafka import KafkaConsumer
|
|
from tools.i18n.i18n import I18nAuto
|
|
from GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights, get_tts_wav
|
|
from dotenv import load_dotenv
|
|
import torch
|
|
|
|
# 加载 .env 文件
|
|
load_dotenv()
|
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
print(f"使用设备: {device}")
|
|
|
|
# Redis 配置
|
|
REDIS_HOST = os.getenv('REDIS_HOST')
|
|
REDIS_PORT = int(os.getenv('REDIS_PORT'))
|
|
REDIS_TASK_DB = int(os.getenv('REDIS_TASK_DB')) # DB 3
|
|
REDIS_PASSWORD = os.getenv('REDIS_PASSWORD')
|
|
|
|
# Kafka 配置
|
|
KAFKA_BROKER = os.getenv('KAFKA_BROKER')
|
|
KAFKA_TTS_TOPIC = os.getenv('KAFKA_TTS_TOPIC')
|
|
|
|
# TTS 配置
|
|
GPT_MODEL_PATH = os.getenv('GPT_MODEL_PATH')
|
|
SOVITS_MODEL_PATH = os.getenv('SOVITS_MODEL_PATH')
|
|
REF_LANGUAGE = os.getenv('REF_LANGUAGE')
|
|
TARGET_LANGUAGE = os.getenv('TARGET_LANGUAGE')
|
|
OUTPUT_PATH = os.getenv('OUTPUT_PATH')
|
|
|
|
# Redis 配置
|
|
REDIS_GIRL_DB = int(os.getenv('REDIS_GIRL_DB'))
|
|
REDIS_WOMAN_DB = int(os.getenv('REDIS_WOMAN_DB'))
|
|
REDIS_MAN_DB = int(os.getenv('REDIS_MAN_DB'))
|
|
REDIS_LEIJUN_DB = int(os.getenv('REDIS_LEIJUN_DB'))
|
|
REDIS_DUFU_DB = int(os.getenv('REDIS_DUFU_DB'))
|
|
REDIS_HEJIONG_DB = int(os.getenv('REDIS_HEJIONG_DB'))
|
|
REDIS_MAHUATENG_DB = int(os.getenv('REDIS_MAHUATENG_DB'))
|
|
REDIS_LIDAN_DB = int(os.getenv('REDIS_LIDAN_DB'))
|
|
REDIS_YUHUA_DB = int(os.getenv('REDIS_YUHUA_DB'))
|
|
REDIS_LIUZHENYUN_DB = int(os.getenv('REDIS_LIUZHENYUN_DB'))
|
|
REDIS_DABING_DB = int(os.getenv('REDIS_DABING_DB'))
|
|
REDIS_LUOXIANG_DB = int(os.getenv('REDIS_LUOXIANG_DB'))
|
|
REDIS_XUZHIYUAN_DB = int(os.getenv('REDIS_XUZHIYUAN_DB'))
|
|
|
|
# 初始化 Redis 客户端
|
|
redis_tts_girl = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_GIRL_DB)
|
|
redis_tts_woman = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_WOMAN_DB)
|
|
redis_tts_man = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_MAN_DB)
|
|
redis_tts_leijun = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_LEIJUN_DB)
|
|
redis_tts_dufu = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_DUFU_DB)
|
|
redis_tts_hejiong = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_HEJIONG_DB)
|
|
redis_tts_mahuateng = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_MAHUATENG_DB)
|
|
redis_tts_lidan = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_LIDAN_DB)
|
|
redis_tts_yuhua = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_YUHUA_DB)
|
|
redis_tts_liuzhenyun = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_LIUZHENYUN_DB)
|
|
redis_tts_dabing = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_DABING_DB)
|
|
redis_tts_luoxiang = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_LUOXIANG_DB)
|
|
redis_tts_xuzhiyuan = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_XUZHIYUAN_DB)
|
|
|
|
redis_task_client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_TASK_DB, password=REDIS_PASSWORD)
|
|
|
|
# 创建音色到对应 Redis 客户端的映射
|
|
voice_to_redis = {
|
|
"default": redis_tts_girl,
|
|
"girl": redis_tts_girl,
|
|
"woman": redis_tts_woman,
|
|
"man": redis_tts_man,
|
|
"leijun": redis_tts_leijun,
|
|
"dufu": redis_tts_dufu,
|
|
"hejiong": redis_tts_hejiong,
|
|
"mahuateng": redis_tts_mahuateng,
|
|
"lidan": redis_tts_lidan,
|
|
"yuhua": redis_tts_yuhua,
|
|
"liuzhenyun": redis_tts_liuzhenyun,
|
|
"dabing": redis_tts_dabing,
|
|
"luoxiang": redis_tts_luoxiang,
|
|
"xuzhiyuan": redis_tts_xuzhiyuan
|
|
}
|
|
|
|
i18n = I18nAuto()
|
|
|
|
# Voice configurations
|
|
GIRL_REF_AUDIO = os.getenv('GIRL_REF_AUDIO')
|
|
GIRL_REF_TEXT = os.getenv('GIRL_REF_TEXT')
|
|
WOMAN_REF_AUDIO = os.getenv('WOMAN_REF_AUDIO')
|
|
WOMAN_REF_TEXT = os.getenv('WOMAN_REF_TEXT')
|
|
MAN_REF_AUDIO = os.getenv('MAN_REF_AUDIO')
|
|
MAN_REF_TEXT = os.getenv('MAN_REF_TEXT')
|
|
LEIJUN_REF_AUDIO = os.getenv('LEIJUN_REF_AUDIO')
|
|
LEIJUN_REF_TEXT = os.getenv('LEIJUN_REF_TEXT')
|
|
DUFU_REF_AUDIO = os.getenv('DUFU_REF_AUDIO')
|
|
DUFU_REF_TEXT = os.getenv('DUFU_REF_TEXT')
|
|
HEJIONG_REF_AUDIO = os.getenv('HEJIONG_REF_AUDIO')
|
|
HEJIONG_REF_TEXT = os.getenv('HEJIONG_REF_TEXT')
|
|
MAHUATENG_REF_AUDIO = os.getenv('MAHUATENG_REF_AUDIO')
|
|
MAHUATENG_REF_TEXT = os.getenv('MAHUATENG_REF_TEXT')
|
|
LIDAN_REF_AUDIO = os.getenv('LIDAN_REF_AUDIO')
|
|
LIDAN_REF_TEXT = os.getenv('LIDAN_REF_TEXT')
|
|
YUHUA_REF_AUDIO = os.getenv('YUHUA_REF_AUDIO')
|
|
YUHUA_REF_TEXT = os.getenv('YUHUA_REF_TEXT')
|
|
LIUZHENYUN_REF_AUDIO = os.getenv('LIUZHENYUN_REF_AUDIO')
|
|
LIUZHENYUN_REF_TEXT = os.getenv('LIUZHENYUN_REF_TEXT')
|
|
DABING_REF_AUDIO = os.getenv('DABING_REF_AUDIO')
|
|
DABING_REF_TEXT = os.getenv('DABING_REF_TEXT')
|
|
LUOXIANG_REF_AUDIO = os.getenv('LUOXIANG_REF_AUDIO')
|
|
LUOXIANG_REF_TEXT = os.getenv('LUOXIANG_REF_TEXT')
|
|
XUZHIYUAN_REF_AUDIO = os.getenv('XUZHIYUAN_REF_AUDIO')
|
|
XUZHIYUAN_REF_TEXT = os.getenv('XUZHIYUAN_REF_TEXT')
|
|
|
|
VOICE_CONFIGS = {
|
|
"girl": {
|
|
"ref_audio": GIRL_REF_AUDIO,
|
|
"ref_text": GIRL_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
},
|
|
"woman": {
|
|
"ref_audio": WOMAN_REF_AUDIO,
|
|
"ref_text": WOMAN_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
},
|
|
"man": {
|
|
"ref_audio": MAN_REF_AUDIO,
|
|
"ref_text": MAN_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
},
|
|
"leijun": {
|
|
"ref_audio": LEIJUN_REF_AUDIO,
|
|
"ref_text": LEIJUN_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
},
|
|
"dufu": {
|
|
"ref_audio": DUFU_REF_AUDIO,
|
|
"ref_text": DUFU_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
},
|
|
"hejiong": {
|
|
"ref_audio": HEJIONG_REF_AUDIO,
|
|
"ref_text": HEJIONG_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
},
|
|
"mahuateng": {
|
|
"ref_audio": MAHUATENG_REF_AUDIO,
|
|
"ref_text": MAHUATENG_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
},
|
|
"lidan": {
|
|
"ref_audio": LIDAN_REF_AUDIO,
|
|
"ref_text": LIDAN_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
},
|
|
"default": {
|
|
"ref_audio": GIRL_REF_AUDIO,
|
|
"ref_text": GIRL_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
},
|
|
"yuhua": {
|
|
"ref_audio": YUHUA_REF_AUDIO,
|
|
"ref_text": YUHUA_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
},
|
|
"liuzhenyun": {
|
|
"ref_audio": LIUZHENYUN_REF_AUDIO,
|
|
"ref_text": LIUZHENYUN_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
},
|
|
"dabing": {
|
|
"ref_audio": DABING_REF_AUDIO,
|
|
"ref_text": DABING_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
},
|
|
"luoxiang": {
|
|
"ref_audio": LUOXIANG_REF_AUDIO,
|
|
"ref_text": LUOXIANG_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
},
|
|
"xuzhiyuan": {
|
|
"ref_audio": XUZHIYUAN_REF_AUDIO,
|
|
"ref_text": XUZHIYUAN_REF_TEXT,
|
|
"ref_language": REF_LANGUAGE
|
|
}
|
|
}
|
|
|
|
def get_audio_hash(text):
|
|
return hashlib.md5(text.encode()).hexdigest()
|
|
|
|
# 在启动时初始化模型
|
|
print("正在初始化模型...")
|
|
change_gpt_weights(gpt_path=GPT_MODEL_PATH)
|
|
change_sovits_weights(sovits_path=SOVITS_MODEL_PATH)
|
|
print("模型初始化成功。")
|
|
|
|
def read_ref_text(voice_type):
|
|
ref_text_path = VOICE_CONFIGS[voice_type]["ref_text"]
|
|
ref_text = ""
|
|
try:
|
|
if os.path.exists(ref_text_path):
|
|
with open(ref_text_path, 'r', encoding='utf-8') as file:
|
|
ref_text = file.read()
|
|
else:
|
|
print(f"警告:{voice_type} 的参考文本文件 '{ref_text_path}' 不存在。")
|
|
except IOError as e:
|
|
print(f"错误:无法读取 {voice_type} 的参考文本文件 '{ref_text_path}'。{str(e)}")
|
|
return ref_text
|
|
|
|
def synthesize(target_text, output_wav_path, voice):
|
|
voice_config = VOICE_CONFIGS[voice]
|
|
ref_audio_path = voice_config["ref_audio"]
|
|
|
|
with open(voice_config["ref_text"], 'r', encoding='utf-8') as file:
|
|
ref_text = file.read()
|
|
|
|
with torch.cuda.device(device):
|
|
synthesis_result = get_tts_wav(
|
|
ref_wav_path=ref_audio_path,
|
|
prompt_text=ref_text,
|
|
prompt_language=i18n(voice_config["ref_language"]),
|
|
text=target_text,
|
|
text_language=i18n(TARGET_LANGUAGE),
|
|
top_p=1,
|
|
temperature=1
|
|
)
|
|
|
|
result_list = list(synthesis_result)
|
|
|
|
if result_list:
|
|
last_sampling_rate, last_audio_data = result_list[-1]
|
|
sf.write(output_wav_path, last_audio_data, last_sampling_rate)
|
|
return output_wav_path
|
|
else:
|
|
return None
|
|
|
|
def kafka_consumer():
|
|
consumer = KafkaConsumer(
|
|
KAFKA_TTS_TOPIC,
|
|
bootstrap_servers=KAFKA_BROKER,
|
|
auto_offset_reset='latest',
|
|
value_deserializer=lambda m: json.loads(m.decode('utf-8'))
|
|
)
|
|
print(f"TTS消费者已启动")
|
|
for message in consumer:
|
|
task_id = None
|
|
error_occurred = False # 将这行移到循环的开始
|
|
try:
|
|
task_id = message.value['task_id']
|
|
target_text = message.value['text']
|
|
text_hash = message.value['text_hash']
|
|
voice = message.value.get('voice', 'default')
|
|
|
|
if voice == 'default':
|
|
voice = 'girl'
|
|
if voice not in VOICE_CONFIGS:
|
|
print(f"警告:无效的音色类型 '{voice}'。使用默认音色。")
|
|
voice = "girl"
|
|
|
|
# 更新任务状态为 "processing"
|
|
redis_task_client.set(f"task_status:tts:{task_id}", "processing")
|
|
|
|
# 使用对应音色的 Redis 客户端
|
|
redis_tts = voice_to_redis[voice]
|
|
|
|
# 检查是否已存在相同内容的音频文件
|
|
existing_audio_info = redis_tts.get(f"tts:{text_hash}")
|
|
if existing_audio_info:
|
|
existing_audio_path = json.loads(existing_audio_info)['path']
|
|
if os.path.exists(existing_audio_path):
|
|
# 如果文件已存在,直接使用现有文件
|
|
output_path = existing_audio_path
|
|
else:
|
|
# 如果文件不存在,重新生成
|
|
output_wav_path = os.path.join(OUTPUT_PATH, f"{text_hash}_{voice}.wav")
|
|
output_path = synthesize(target_text, output_wav_path, voice)
|
|
else:
|
|
# 如果不存在,创建新的音频文件
|
|
output_wav_path = os.path.join(OUTPUT_PATH, f"{text_hash}_{voice}.wav")
|
|
output_path = synthesize(target_text, output_wav_path, voice)
|
|
|
|
if output_path:
|
|
# 将结果保存在对应音色的 Redis 中
|
|
redis_tts.set(f"tts:{text_hash}", json.dumps({"path": output_path}))
|
|
print(f"音频合成成功: {output_path}")
|
|
|
|
# 更新任务状态为 "completed"
|
|
redis_task_client.set(f"task_status:tts:{task_id}", "completed")
|
|
|
|
# 存储任务信息
|
|
redis_task_client.set(f"task_info:tts:{task_id}", json.dumps({
|
|
"text_hash": text_hash,
|
|
"voice": voice
|
|
}))
|
|
else:
|
|
print("音频合成失败")
|
|
error_occurred = True
|
|
|
|
except KeyError as e:
|
|
print(f"错误:消息中缺少必要的键: {e}")
|
|
error_occurred = True
|
|
except Exception as e:
|
|
print(f"处理消息时出错: {str(e)}")
|
|
print(traceback.format_exc())
|
|
error_occurred = True
|
|
finally:
|
|
if error_occurred:
|
|
print("处理消息时发生错误")
|
|
if task_id:
|
|
redis_task_client.set(f"task_status:tts:{task_id}", "failed")
|
|
else:
|
|
print("消息处理完成")
|
|
if __name__ == "__main__":
|
|
torch.cuda.set_device(device)
|
|
kafka_consumer() |