69 lines
2.4 KiB
Python
69 lines
2.4 KiB
Python
import os
|
|
import whisper
|
|
import argparse
|
|
|
|
def transcribe_audio(model, audio_path):
|
|
"""
|
|
使用Whisper模型转录音频文件
|
|
|
|
:param model: 加载的Whisper模型
|
|
:param audio_path: 音频文件路径
|
|
:return: 转录的文本
|
|
"""
|
|
try:
|
|
result = model.transcribe(audio_path)
|
|
return result["text"]
|
|
except Exception as e:
|
|
print(f"转录失败 {audio_path}: {str(e)}")
|
|
return None
|
|
|
|
def process_directory(directory, model):
|
|
"""
|
|
处理目录中的所有WAV文件
|
|
|
|
:param directory: 包含WAV文件的目录路径
|
|
:param model: 加载的Whisper模型
|
|
"""
|
|
for filename in os.listdir(directory):
|
|
if filename.lower().endswith('.wav'):
|
|
input_file = os.path.join(directory, filename)
|
|
output_file = os.path.splitext(input_file)[0] + ".txt"
|
|
|
|
print(f"正在处理: {input_file}")
|
|
transcription = transcribe_audio(model, input_file)
|
|
|
|
if transcription:
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write(transcription)
|
|
print(f"转录完成: {output_file}")
|
|
else:
|
|
print(f"转录失败: {input_file}")
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="使用Whisper将WAV文件转换为文本")
|
|
parser.add_argument("input_path", help="输入的WAV文件或包含WAV文件的目录路径")
|
|
parser.add_argument("--model", default="small", choices=["tiny", "base", "small", "medium", "large", "large-v3"], help="Whisper模型大小")
|
|
args = parser.parse_args()
|
|
|
|
print(f"正在加载Whisper模型 ({args.model})...")
|
|
model = whisper.load_model(args.model)
|
|
print("模型加载完成")
|
|
|
|
if os.path.isfile(args.input_path):
|
|
if not args.input_path.lower().endswith('.wav'):
|
|
print("错误: 输入文件不是WAV格式")
|
|
return
|
|
output_file = os.path.splitext(args.input_path)[0] + ".txt"
|
|
transcription = transcribe_audio(model, args.input_path)
|
|
if transcription:
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write(transcription)
|
|
print(f"转录完成: {output_file}")
|
|
elif os.path.isdir(args.input_path):
|
|
process_directory(args.input_path, model)
|
|
else:
|
|
print("错误: 输入路径既不是文件也不是目录")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|