import os import soundfile as sf import redis import hashlib import json import traceback from kafka import KafkaConsumer from TTS.tools.i18n.i18n import I18nAuto from TTS.GPT_SoVITS.inference_webui import change_gpt_weights, change_sovits_weights, get_tts_wav from dotenv import load_dotenv import torch # 加载 .env 文件 load_dotenv() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(f"使用设备: {device}") # Redis 配置 REDIS_HOST = os.getenv('REDIS_HOST') REDIS_PORT = int(os.getenv('REDIS_PORT')) REDIS_TASK_DB = int(os.getenv('REDIS_TASK_DB')) # DB 3 REDIS_PASSWORD = os.getenv('REDIS_PASSWORD') # Kafka 配置 KAFKA_BROKER = os.getenv('KAFKA_BROKER') KAFKA_TTS_TOPIC = os.getenv('KAFKA_TTS_TOPIC') # TTS 配置 GPT_MODEL_PATH = os.getenv('GPT_MODEL_PATH') SOVITS_MODEL_PATH = os.getenv('SOVITS_MODEL_PATH') REF_LANGUAGE = os.getenv('REF_LANGUAGE') TARGET_LANGUAGE = os.getenv('TARGET_LANGUAGE') OUTPUT_PATH = os.getenv('OUTPUT_PATH') # Redis 配置 REDIS_GIRL_DB = int(os.getenv('REDIS_GIRL_DB')) REDIS_WOMAN_DB = int(os.getenv('REDIS_WOMAN_DB')) REDIS_MAN_DB = int(os.getenv('REDIS_MAN_DB')) REDIS_LEIJUN_DB = int(os.getenv('REDIS_LEIJUN_DB')) REDIS_DUFU_DB = int(os.getenv('REDIS_DUFU_DB')) REDIS_HEJIONG_DB = int(os.getenv('REDIS_HEJIONG_DB')) REDIS_MAHUATENG_DB = int(os.getenv('REDIS_MAHUATENG_DB')) REDIS_LIDAN_DB = int(os.getenv('REDIS_LIDAN_DB')) REDIS_YUHUA_DB = int(os.getenv('REDIS_YUHUA_DB')) REDIS_LIUZHENYUN_DB = int(os.getenv('REDIS_LIUZHENYUN_DB')) REDIS_DABING_DB = int(os.getenv('REDIS_DABING_DB')) REDIS_LUOXIANG_DB = int(os.getenv('REDIS_LUOXIANG_DB')) REDIS_XUZHIYUAN_DB = int(os.getenv('REDIS_XUZHIYUAN_DB')) # 初始化 Redis 客户端 redis_tts_girl = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_GIRL_DB) redis_tts_woman = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_WOMAN_DB) redis_tts_man = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_MAN_DB) redis_tts_leijun = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_LEIJUN_DB) redis_tts_dufu = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_DUFU_DB) redis_tts_hejiong = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_HEJIONG_DB) redis_tts_mahuateng = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_MAHUATENG_DB) redis_tts_lidan = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_LIDAN_DB) redis_tts_yuhua = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_YUHUA_DB) redis_tts_liuzhenyun = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_LIUZHENYUN_DB) redis_tts_dabing = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_DABING_DB) redis_tts_luoxiang = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_LUOXIANG_DB) redis_tts_xuzhiyuan = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_XUZHIYUAN_DB) redis_task_client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_TASK_DB, password=REDIS_PASSWORD) # 创建音色到对应 Redis 客户端的映射 voice_to_redis = { "default": redis_tts_girl, "girl": redis_tts_girl, "woman": redis_tts_woman, "man": redis_tts_man, "leijun": redis_tts_leijun, "dufu": redis_tts_dufu, "hejiong": redis_tts_hejiong, "mahuateng": redis_tts_mahuateng, "lidan": redis_tts_lidan, "yuhua": redis_tts_yuhua, "liuzhenyun": redis_tts_liuzhenyun, "dabing": redis_tts_dabing, "luoxiang": redis_tts_luoxiang, "xuzhiyuan": redis_tts_xuzhiyuan } i18n = I18nAuto() # Voice configurations GIRL_REF_AUDIO = os.getenv('GIRL_REF_AUDIO') GIRL_REF_TEXT = os.getenv('GIRL_REF_TEXT') WOMAN_REF_AUDIO = os.getenv('WOMAN_REF_AUDIO') WOMAN_REF_TEXT = os.getenv('WOMAN_REF_TEXT') MAN_REF_AUDIO = os.getenv('MAN_REF_AUDIO') MAN_REF_TEXT = os.getenv('MAN_REF_TEXT') LEIJUN_REF_AUDIO = os.getenv('LEIJUN_REF_AUDIO') LEIJUN_REF_TEXT = os.getenv('LEIJUN_REF_TEXT') DUFU_REF_AUDIO = os.getenv('DUFU_REF_AUDIO') DUFU_REF_TEXT = os.getenv('DUFU_REF_TEXT') HEJIONG_REF_AUDIO = os.getenv('HEJIONG_REF_AUDIO') HEJIONG_REF_TEXT = os.getenv('HEJIONG_REF_TEXT') MAHUATENG_REF_AUDIO = os.getenv('MAHUATENG_REF_AUDIO') MAHUATENG_REF_TEXT = os.getenv('MAHUATENG_REF_TEXT') LIDAN_REF_AUDIO = os.getenv('LIDAN_REF_AUDIO') LIDAN_REF_TEXT = os.getenv('LIDAN_REF_TEXT') YUHUA_REF_AUDIO = os.getenv('YUHUA_REF_AUDIO') YUHUA_REF_TEXT = os.getenv('YUHUA_REF_TEXT') LIUZHENYUN_REF_AUDIO = os.getenv('LIUZHENYUN_REF_AUDIO') LIUZHENYUN_REF_TEXT = os.getenv('LIUZHENYUN_REF_TEXT') DABING_REF_AUDIO = os.getenv('DABING_REF_AUDIO') DABING_REF_TEXT = os.getenv('DABING_REF_TEXT') LUOXIANG_REF_AUDIO = os.getenv('LUOXIANG_REF_AUDIO') LUOXIANG_REF_TEXT = os.getenv('LUOXIANG_REF_TEXT') XUZHIYUAN_REF_AUDIO = os.getenv('XUZHIYUAN_REF_AUDIO') XUZHIYUAN_REF_TEXT = os.getenv('XUZHIYUAN_REF_TEXT') VOICE_CONFIGS = { "girl": { "ref_audio": GIRL_REF_AUDIO, "ref_text": GIRL_REF_TEXT, "ref_language": REF_LANGUAGE }, "woman": { "ref_audio": WOMAN_REF_AUDIO, "ref_text": WOMAN_REF_TEXT, "ref_language": REF_LANGUAGE }, "man": { "ref_audio": MAN_REF_AUDIO, "ref_text": MAN_REF_TEXT, "ref_language": REF_LANGUAGE }, "leijun": { "ref_audio": LEIJUN_REF_AUDIO, "ref_text": LEIJUN_REF_TEXT, "ref_language": REF_LANGUAGE }, "dufu": { "ref_audio": DUFU_REF_AUDIO, "ref_text": DUFU_REF_TEXT, "ref_language": REF_LANGUAGE }, "hejiong": { "ref_audio": HEJIONG_REF_AUDIO, "ref_text": HEJIONG_REF_TEXT, "ref_language": REF_LANGUAGE }, "mahuateng": { "ref_audio": MAHUATENG_REF_AUDIO, "ref_text": MAHUATENG_REF_TEXT, "ref_language": REF_LANGUAGE }, "lidan": { "ref_audio": LIDAN_REF_AUDIO, "ref_text": LIDAN_REF_TEXT, "ref_language": REF_LANGUAGE }, "default": { "ref_audio": GIRL_REF_AUDIO, "ref_text": GIRL_REF_TEXT, "ref_language": REF_LANGUAGE }, "yuhua": { "ref_audio": YUHUA_REF_AUDIO, "ref_text": YUHUA_REF_TEXT, "ref_language": REF_LANGUAGE }, "liuzhenyun": { "ref_audio": LIUZHENYUN_REF_AUDIO, "ref_text": LIUZHENYUN_REF_TEXT, "ref_language": REF_LANGUAGE }, "dabing": { "ref_audio": DABING_REF_AUDIO, "ref_text": DABING_REF_TEXT, "ref_language": REF_LANGUAGE }, "luoxiang": { "ref_audio": LUOXIANG_REF_AUDIO, "ref_text": LUOXIANG_REF_TEXT, "ref_language": REF_LANGUAGE }, "xuzhiyuan": { "ref_audio": XUZHIYUAN_REF_AUDIO, "ref_text": XUZHIYUAN_REF_TEXT, "ref_language": REF_LANGUAGE } } def get_audio_hash(text): return hashlib.md5(text.encode()).hexdigest() # 在启动时初始化模型 print("正在初始化模型...") change_gpt_weights(gpt_path=GPT_MODEL_PATH) change_sovits_weights(sovits_path=SOVITS_MODEL_PATH) print("模型初始化成功。") def read_ref_text(voice_type): ref_text_path = VOICE_CONFIGS[voice_type]["ref_text"] ref_text = "" try: if os.path.exists(ref_text_path): with open(ref_text_path, 'r', encoding='utf-8') as file: ref_text = file.read() else: print(f"警告:{voice_type} 的参考文本文件 '{ref_text_path}' 不存在。") except IOError as e: print(f"错误:无法读取 {voice_type} 的参考文本文件 '{ref_text_path}'。{str(e)}") return ref_text def synthesize(target_text, output_wav_path, voice): voice_config = VOICE_CONFIGS[voice] ref_audio_path = voice_config["ref_audio"] with open(voice_config["ref_text"], 'r', encoding='utf-8') as file: ref_text = file.read() with torch.cuda.device(device): synthesis_result = get_tts_wav( ref_wav_path=ref_audio_path, prompt_text=ref_text, prompt_language=i18n(voice_config["ref_language"]), text=target_text, text_language=i18n(TARGET_LANGUAGE), top_p=1, temperature=1 ) result_list = list(synthesis_result) if result_list: last_sampling_rate, last_audio_data = result_list[-1] sf.write(output_wav_path, last_audio_data, last_sampling_rate) return output_wav_path else: return None def kafka_consumer(): consumer = KafkaConsumer( KAFKA_TTS_TOPIC, bootstrap_servers=KAFKA_BROKER, auto_offset_reset='latest', value_deserializer=lambda m: json.loads(m.decode('utf-8')) ) print(f"TTS消费者已启动") for message in consumer: task_id = None error_occurred = False # 将这行移到循环的开始 try: task_id = message.value['task_id'] target_text = message.value['text'] text_hash = message.value['text_hash'] voice = message.value.get('voice', 'default') if voice == 'default': voice = 'girl' if voice not in VOICE_CONFIGS: print(f"警告:无效的音色类型 '{voice}'。使用默认音色。") voice = "girl" # 更新任务状态为 "processing" redis_task_client.set(f"task_status:tts:{task_id}", "processing") # 使用对应音色的 Redis 客户端 redis_tts = voice_to_redis[voice] # 检查是否已存在相同内容的音频文件 existing_audio_info = redis_tts.get(f"tts:{text_hash}") if existing_audio_info: existing_audio_path = json.loads(existing_audio_info)['path'] if os.path.exists(existing_audio_path): # 如果文件已存在,直接使用现有文件 output_path = existing_audio_path else: # 如果文件不存在,重新生成 output_wav_path = os.path.join(OUTPUT_PATH, f"{text_hash}_{voice}.wav") output_path = synthesize(target_text, output_wav_path, voice) else: # 如果不存在,创建新的音频文件 output_wav_path = os.path.join(OUTPUT_PATH, f"{text_hash}_{voice}.wav") output_path = synthesize(target_text, output_wav_path, voice) if output_path: # 将结果保存在对应音色的 Redis 中 redis_tts.set(f"tts:{text_hash}", json.dumps({"path": output_path})) print(f"音频合成成功: {output_path}") # 更新任务状态为 "completed" redis_task_client.set(f"task_status:tts:{task_id}", "completed") # 存储任务信息 redis_task_client.set(f"task_info:tts:{task_id}", json.dumps({ "text_hash": text_hash, "voice": voice })) else: print("音频合成失败") error_occurred = True except KeyError as e: print(f"错误:消息中缺少必要的键: {e}") error_occurred = True except Exception as e: print(f"处理消息时出错: {str(e)}") print(traceback.format_exc()) error_occurred = True finally: if error_occurred: print("处理消息时发生错误") if task_id: redis_task_client.set(f"task_status:tts:{task_id}", "failed") else: print("消息处理完成") if __name__ == "__main__": torch.cuda.set_device(device) kafka_consumer()