Files
2025-04-10 09:45:41 +00:00

315 lines
12 KiB
Python

import os
import soundfile as sf
import redis
import hashlib
import json
import traceback
from kafka import KafkaConsumer
from TTS.tools.i18n.i18n import I18nAuto
from TTS.GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights, get_tts_wav
from dotenv import load_dotenv
import torch
# 加载 .env 文件
load_dotenv()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"使用设备: {device}")
# Redis 配置
REDIS_HOST = os.getenv('REDIS_HOST')
REDIS_PORT = int(os.getenv('REDIS_PORT'))
REDIS_TASK_DB = int(os.getenv('REDIS_TASK_DB')) # DB 3
REDIS_PASSWORD = os.getenv('REDIS_PASSWORD')
# Kafka 配置
KAFKA_BROKER = os.getenv('KAFKA_BROKER')
KAFKA_TTS_TOPIC = os.getenv('KAFKA_TTS_TOPIC')
# TTS 配置
GPT_MODEL_PATH = os.getenv('GPT_MODEL_PATH')
SOVITS_MODEL_PATH = os.getenv('SOVITS_MODEL_PATH')
REF_LANGUAGE = os.getenv('REF_LANGUAGE')
TARGET_LANGUAGE = os.getenv('TARGET_LANGUAGE')
OUTPUT_PATH = os.getenv('OUTPUT_PATH')
# Redis 配置
REDIS_GIRL_DB = int(os.getenv('REDIS_GIRL_DB'))
REDIS_WOMAN_DB = int(os.getenv('REDIS_WOMAN_DB'))
REDIS_MAN_DB = int(os.getenv('REDIS_MAN_DB'))
REDIS_LEIJUN_DB = int(os.getenv('REDIS_LEIJUN_DB'))
REDIS_DUFU_DB = int(os.getenv('REDIS_DUFU_DB'))
REDIS_HEJIONG_DB = int(os.getenv('REDIS_HEJIONG_DB'))
REDIS_MAHUATENG_DB = int(os.getenv('REDIS_MAHUATENG_DB'))
REDIS_LIDAN_DB = int(os.getenv('REDIS_LIDAN_DB'))
REDIS_YUHUA_DB = int(os.getenv('REDIS_YUHUA_DB'))
REDIS_LIUZHENYUN_DB = int(os.getenv('REDIS_LIUZHENYUN_DB'))
REDIS_DABING_DB = int(os.getenv('REDIS_DABING_DB'))
REDIS_LUOXIANG_DB = int(os.getenv('REDIS_LUOXIANG_DB'))
REDIS_XUZHIYUAN_DB = int(os.getenv('REDIS_XUZHIYUAN_DB'))
# 初始化 Redis 客户端
redis_tts_girl = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_GIRL_DB)
redis_tts_woman = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_WOMAN_DB)
redis_tts_man = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_MAN_DB)
redis_tts_leijun = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_LEIJUN_DB)
redis_tts_dufu = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_DUFU_DB)
redis_tts_hejiong = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_HEJIONG_DB)
redis_tts_mahuateng = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_MAHUATENG_DB)
redis_tts_lidan = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_LIDAN_DB)
redis_tts_yuhua = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_YUHUA_DB)
redis_tts_liuzhenyun = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_LIUZHENYUN_DB)
redis_tts_dabing = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_DABING_DB)
redis_tts_luoxiang = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_LUOXIANG_DB)
redis_tts_xuzhiyuan = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_XUZHIYUAN_DB)
redis_task_client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_TASK_DB, password=REDIS_PASSWORD)
# 创建音色到对应 Redis 客户端的映射
voice_to_redis = {
"default": redis_tts_girl,
"girl": redis_tts_girl,
"woman": redis_tts_woman,
"man": redis_tts_man,
"leijun": redis_tts_leijun,
"dufu": redis_tts_dufu,
"hejiong": redis_tts_hejiong,
"mahuateng": redis_tts_mahuateng,
"lidan": redis_tts_lidan,
"yuhua": redis_tts_yuhua,
"liuzhenyun": redis_tts_liuzhenyun,
"dabing": redis_tts_dabing,
"luoxiang": redis_tts_luoxiang,
"xuzhiyuan": redis_tts_xuzhiyuan
}
i18n = I18nAuto()
# Voice configurations
GIRL_REF_AUDIO = os.getenv('GIRL_REF_AUDIO')
GIRL_REF_TEXT = os.getenv('GIRL_REF_TEXT')
WOMAN_REF_AUDIO = os.getenv('WOMAN_REF_AUDIO')
WOMAN_REF_TEXT = os.getenv('WOMAN_REF_TEXT')
MAN_REF_AUDIO = os.getenv('MAN_REF_AUDIO')
MAN_REF_TEXT = os.getenv('MAN_REF_TEXT')
LEIJUN_REF_AUDIO = os.getenv('LEIJUN_REF_AUDIO')
LEIJUN_REF_TEXT = os.getenv('LEIJUN_REF_TEXT')
DUFU_REF_AUDIO = os.getenv('DUFU_REF_AUDIO')
DUFU_REF_TEXT = os.getenv('DUFU_REF_TEXT')
HEJIONG_REF_AUDIO = os.getenv('HEJIONG_REF_AUDIO')
HEJIONG_REF_TEXT = os.getenv('HEJIONG_REF_TEXT')
MAHUATENG_REF_AUDIO = os.getenv('MAHUATENG_REF_AUDIO')
MAHUATENG_REF_TEXT = os.getenv('MAHUATENG_REF_TEXT')
LIDAN_REF_AUDIO = os.getenv('LIDAN_REF_AUDIO')
LIDAN_REF_TEXT = os.getenv('LIDAN_REF_TEXT')
YUHUA_REF_AUDIO = os.getenv('YUHUA_REF_AUDIO')
YUHUA_REF_TEXT = os.getenv('YUHUA_REF_TEXT')
LIUZHENYUN_REF_AUDIO = os.getenv('LIUZHENYUN_REF_AUDIO')
LIUZHENYUN_REF_TEXT = os.getenv('LIUZHENYUN_REF_TEXT')
DABING_REF_AUDIO = os.getenv('DABING_REF_AUDIO')
DABING_REF_TEXT = os.getenv('DABING_REF_TEXT')
LUOXIANG_REF_AUDIO = os.getenv('LUOXIANG_REF_AUDIO')
LUOXIANG_REF_TEXT = os.getenv('LUOXIANG_REF_TEXT')
XUZHIYUAN_REF_AUDIO = os.getenv('XUZHIYUAN_REF_AUDIO')
XUZHIYUAN_REF_TEXT = os.getenv('XUZHIYUAN_REF_TEXT')
VOICE_CONFIGS = {
"girl": {
"ref_audio": GIRL_REF_AUDIO,
"ref_text": GIRL_REF_TEXT,
"ref_language": REF_LANGUAGE
},
"woman": {
"ref_audio": WOMAN_REF_AUDIO,
"ref_text": WOMAN_REF_TEXT,
"ref_language": REF_LANGUAGE
},
"man": {
"ref_audio": MAN_REF_AUDIO,
"ref_text": MAN_REF_TEXT,
"ref_language": REF_LANGUAGE
},
"leijun": {
"ref_audio": LEIJUN_REF_AUDIO,
"ref_text": LEIJUN_REF_TEXT,
"ref_language": REF_LANGUAGE
},
"dufu": {
"ref_audio": DUFU_REF_AUDIO,
"ref_text": DUFU_REF_TEXT,
"ref_language": REF_LANGUAGE
},
"hejiong": {
"ref_audio": HEJIONG_REF_AUDIO,
"ref_text": HEJIONG_REF_TEXT,
"ref_language": REF_LANGUAGE
},
"mahuateng": {
"ref_audio": MAHUATENG_REF_AUDIO,
"ref_text": MAHUATENG_REF_TEXT,
"ref_language": REF_LANGUAGE
},
"lidan": {
"ref_audio": LIDAN_REF_AUDIO,
"ref_text": LIDAN_REF_TEXT,
"ref_language": REF_LANGUAGE
},
"default": {
"ref_audio": GIRL_REF_AUDIO,
"ref_text": GIRL_REF_TEXT,
"ref_language": REF_LANGUAGE
},
"yuhua": {
"ref_audio": YUHUA_REF_AUDIO,
"ref_text": YUHUA_REF_TEXT,
"ref_language": REF_LANGUAGE
},
"liuzhenyun": {
"ref_audio": LIUZHENYUN_REF_AUDIO,
"ref_text": LIUZHENYUN_REF_TEXT,
"ref_language": REF_LANGUAGE
},
"dabing": {
"ref_audio": DABING_REF_AUDIO,
"ref_text": DABING_REF_TEXT,
"ref_language": REF_LANGUAGE
},
"luoxiang": {
"ref_audio": LUOXIANG_REF_AUDIO,
"ref_text": LUOXIANG_REF_TEXT,
"ref_language": REF_LANGUAGE
},
"xuzhiyuan": {
"ref_audio": XUZHIYUAN_REF_AUDIO,
"ref_text": XUZHIYUAN_REF_TEXT,
"ref_language": REF_LANGUAGE
}
}
def get_audio_hash(text):
return hashlib.md5(text.encode()).hexdigest()
# 在启动时初始化模型
print("正在初始化模型...")
change_gpt_weights(gpt_path=GPT_MODEL_PATH)
change_sovits_weights(sovits_path=SOVITS_MODEL_PATH)
print("模型初始化成功。")
def read_ref_text(voice_type):
ref_text_path = VOICE_CONFIGS[voice_type]["ref_text"]
ref_text = ""
try:
if os.path.exists(ref_text_path):
with open(ref_text_path, 'r', encoding='utf-8') as file:
ref_text = file.read()
else:
print(f"警告:{voice_type} 的参考文本文件 '{ref_text_path}' 不存在。")
except IOError as e:
print(f"错误:无法读取 {voice_type} 的参考文本文件 '{ref_text_path}'{str(e)}")
return ref_text
def synthesize(target_text, output_wav_path, voice):
voice_config = VOICE_CONFIGS[voice]
ref_audio_path = voice_config["ref_audio"]
with open(voice_config["ref_text"], 'r', encoding='utf-8') as file:
ref_text = file.read()
with torch.cuda.device(device):
synthesis_result = get_tts_wav(
ref_wav_path=ref_audio_path,
prompt_text=ref_text,
prompt_language=i18n(voice_config["ref_language"]),
text=target_text,
text_language=i18n(TARGET_LANGUAGE),
top_p=1,
temperature=1
)
result_list = list(synthesis_result)
if result_list:
last_sampling_rate, last_audio_data = result_list[-1]
sf.write(output_wav_path, last_audio_data, last_sampling_rate)
return output_wav_path
else:
return None
def kafka_consumer():
consumer = KafkaConsumer(
KAFKA_TTS_TOPIC,
bootstrap_servers=KAFKA_BROKER,
auto_offset_reset='latest',
value_deserializer=lambda m: json.loads(m.decode('utf-8'))
)
print(f"TTS消费者已启动")
for message in consumer:
task_id = None
error_occurred = False # 将这行移到循环的开始
try:
task_id = message.value['task_id']
target_text = message.value['text']
text_hash = message.value['text_hash']
voice = message.value.get('voice', 'default')
if voice == 'default':
voice = 'girl'
if voice not in VOICE_CONFIGS:
print(f"警告:无效的音色类型 '{voice}'。使用默认音色。")
voice = "girl"
# 更新任务状态为 "processing"
redis_task_client.set(f"task_status:tts:{task_id}", "processing")
# 使用对应音色的 Redis 客户端
redis_tts = voice_to_redis[voice]
# 检查是否已存在相同内容的音频文件
existing_audio_info = redis_tts.get(f"tts:{text_hash}")
if existing_audio_info:
existing_audio_path = json.loads(existing_audio_info)['path']
if os.path.exists(existing_audio_path):
# 如果文件已存在,直接使用现有文件
output_path = existing_audio_path
else:
# 如果文件不存在,重新生成
output_wav_path = os.path.join(OUTPUT_PATH, f"{text_hash}_{voice}.wav")
output_path = synthesize(target_text, output_wav_path, voice)
else:
# 如果不存在,创建新的音频文件
output_wav_path = os.path.join(OUTPUT_PATH, f"{text_hash}_{voice}.wav")
output_path = synthesize(target_text, output_wav_path, voice)
if output_path:
# 将结果保存在对应音色的 Redis 中
redis_tts.set(f"tts:{text_hash}", json.dumps({"path": output_path}))
print(f"音频合成成功: {output_path}")
# 更新任务状态为 "completed"
redis_task_client.set(f"task_status:tts:{task_id}", "completed")
# 存储任务信息
redis_task_client.set(f"task_info:tts:{task_id}", json.dumps({
"text_hash": text_hash,
"voice": voice
}))
else:
print("音频合成失败")
error_occurred = True
except KeyError as e:
print(f"错误:消息中缺少必要的键: {e}")
error_occurred = True
except Exception as e:
print(f"处理消息时出错: {str(e)}")
print(traceback.format_exc())
error_occurred = True
finally:
if error_occurred:
print("处理消息时发生错误")
if task_id:
redis_task_client.set(f"task_status:tts:{task_id}", "failed")
else:
print("消息处理完成")
if __name__ == "__main__":
torch.cuda.set_device(device)
kafka_consumer()