from moviepy.editor import VideoFileClip
from pydub import AudioSegment
video_path = '/opt/audio/audios/video1.mp4'
audio_path = '/opt/audio/audios/video1.wav' # 提取的音频保存路径
# 加载视频文件
video = VideoFileClip(video_path)
# 提取音频
audio = video.audio
# 保存音频文件
audio.write_audiofile(audio_path)
# 读取音频文件
sound = AudioSegment.from_wav(audio_path)
# 将音频转换为单声道
sound = sound.set_channels(1)
sound = sound.set_frame_rate(16000)
# 保存音频文件(单声道)
sound.export(audio_path, format="wav")
from pydub import AudioSegment
# 加载音频文件
audio = AudioSegment.from_file(
"xxx/1.wav")
# 定义起始和结束时间(单位为毫秒)
start_time = 3000
end_time = 28550
# 截取音频
extracted = audio[start_time:end_time]
# 导出截取的音频
extracted.export(
"xxx/3.wav", format="wav")
import os
from pydub import AudioSegment
base_path = f'{os.getcwd()}/audios/reduce_noise/video2/'
# 要拼接的音频的路径list
short_audio_files = []
for i in range(100, 105):
path = base_path + str(i) + ".wav"
wav = AudioSegment.from_file(path)
duration_seconds = wav.duration_seconds
short_audio_files.append(path)
# 声明一个空白音频
merged_audio = AudioSegment.empty()
# 遍历每个短音频文件并添加到合并后的音频中
for audio_file in short_audio_files:
# 从文件加载短音频
short_audio = AudioSegment.from_file(audio_file)
# 将短音频追加到合并后的音频
merged_audio = merged_audio.append(short_audio, crossfade=0)
# 保存合并后的音频为一个长音频文件
merged_audio.export(f"{base_path}merged_audio.wav", format="wav")
主要使用whisper-diarization:GitHub - MahmoudAshraf97/whisper-diarization: Automatic Speech Recognition with Speaker Diarization based on OpenAI Whisper
使用场景:一整个wav ,分理出不同人对应的wav,具体效果如下: