brew install tesseract
安装pytesseract和pillow
pip install pytesseract
pip install pillow
pip install SpeechRecognition
pip3 install soundfile
pip3 install torch?
pip3 install whisper
脚本参考:
import os
import sys
import cv2
import pytesseract
import speech_recognition as sr
from moviepy.video.io.VideoFileClip import VideoFileClip
from pydub import AudioSegment
def extract_subtitles(video_path, output_dir):
audio = AudioSegment.from_file(video_path, format='mp4')
audio.export("audio.wav", format="wav")
r = sr.Recognizer()
audio_file = sr.AudioFile('audio.wav')
with audio_file as source:
audio = r.record(source)
text = r.recognize_whisper(audio)
print(text)
def extract_subtitles_v2(video_path, output_dir):
r = sr.Recognizer()
clip = VideoFileClip(video_path)
clip.audio.write_audiofile('audio.wav')
sound = AudioSegment.from_file('audio.wav', format='wav')
def transcribe_audio(sound):
transcript = ''
with sr.AudioFile(sound) as source:
audio_text = r.record(source)
try:
transcript = r.recognize_whisper(audio_text)
except sr.UnknownValueError as e:
print(e)
return transcript
transcription = transcribe_audio(sound)
print(transcription)
def extract_subtitles_v1(video_path, output_dir):
vidcap = cv2.VideoCapture(video_path)
frames = []
success, image = vidcap.read()
count = 0
success = True
while success:
frames.append(image)
success, image = vidcap.read()
count += 1
# lang='chi_sim'
for frame in frames:
# text = pytesseract.image_to_string(frame, lang='eng')
text = pytesseract.image_to_string(frame, lang='chi_sim')
print(text)
def extract_subtitles_v3(video_path, output_dir):
vidcap = cv2.VideoCapture(video_path)
frames = []
success, image = vidcap.read()
count = 0
success = True
while success:
success, image = vidcap.read()
text = pytesseract.image_to_string(image, lang='chi_sim')
print(text)
count += 1
if __name__ == '__main__':
current_dir = os.getcwd()
print("current_dir:", current_dir)
# current_dir + "/" + "png"
# current_dir + "/" + "png"
# args: ['merge.py', 'png', 'png']
args = sys.argv
print('args:', args)
input_dir = args[1]
output_dir = args[2]
# extract_subtitles(input_dir, output_dir)
# extract_subtitles_v1(input_dir, output_dir)
extract_subtitles_v3(input_dir, output_dir)