Files
api/api_chat/before/wav_to_text.py
T
2025-01-12 06:58:52 +00:00

69 lines
2.4 KiB
Python

import os
import whisper
import argparse
def transcribe_audio(model, audio_path):
"""
使用Whisper模型转录音频文件
:param model: 加载的Whisper模型
:param audio_path: 音频文件路径
:return: 转录的文本
"""
try:
result = model.transcribe(audio_path)
return result["text"]
except Exception as e:
print(f"转录失败 {audio_path}: {str(e)}")
return None
def process_directory(directory, model):
"""
处理目录中的所有WAV文件
:param directory: 包含WAV文件的目录路径
:param model: 加载的Whisper模型
"""
for filename in os.listdir(directory):
if filename.lower().endswith('.wav'):
input_file = os.path.join(directory, filename)
output_file = os.path.splitext(input_file)[0] + ".txt"
print(f"正在处理: {input_file}")
transcription = transcribe_audio(model, input_file)
if transcription:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(transcription)
print(f"转录完成: {output_file}")
else:
print(f"转录失败: {input_file}")
def main():
parser = argparse.ArgumentParser(description="使用Whisper将WAV文件转换为文本")
parser.add_argument("input_path", help="输入的WAV文件或包含WAV文件的目录路径")
parser.add_argument("--model", default="small", choices=["tiny", "base", "small", "medium", "large", "large-v3"], help="Whisper模型大小")
args = parser.parse_args()
print(f"正在加载Whisper模型 ({args.model})...")
model = whisper.load_model(args.model)
print("模型加载完成")
if os.path.isfile(args.input_path):
if not args.input_path.lower().endswith('.wav'):
print("错误: 输入文件不是WAV格式")
return
output_file = os.path.splitext(args.input_path)[0] + ".txt"
transcription = transcribe_audio(model, args.input_path)
if transcription:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(transcription)
print(f"转录完成: {output_file}")
elif os.path.isdir(args.input_path):
process_directory(args.input_path, model)
else:
print("错误: 输入路径既不是文件也不是目录")
if __name__ == "__main__":
main()