格式工厂合并 mp4 和 srt,并利用 python 按照字幕剪辑视频,将其分割为若干小段
一、视频合并
1.选择转换为mp4,将视频导入格式工厂
2.调整字幕样式
二、python
1.可能用到的命令:
pip install moviepy
2.main.py
import cut_srt
import cut_video
if __name__ == '__main__':
my_video_path = r"D:\Videos\Star Wars\Star Wars 9 The.Rise.of.Skywalker.2019.mp4"
# 将目标文件夹里所有的srt文件都进行格式化txt操作
my_srt_path = r"D:\Documents"
cut_srt.srt_to_format_txt(my_srt_path)
cut_video.cut_video_by_srt(my_video_path, my_srt_path)
3.cut_srt.py
import re
import os
def check_contain_chinese(check_str):
for ch in check_str:
if u'\u4e00' <= ch <= u'\u9fff':
return True
return False
# 输入原标题,返回格式化后的文件名
def validate_title(str_):
pattern = r"[\/\\\:\*\?\"\<\>\|\.\,\!\'\-\♪\?\!\…\“\”\,]"
new_title = re.sub(pattern, "", str_)
# strip() 方法用于移除字符串头尾指定的字符(默认为空格或换行符)或字符序列。注意:该方法只能删除开头或是结尾的字符,不能删除中间部分的字符
# re.sub(' +', ' ', str) 将 str 中的多个空格转化为一个空格
return re.sub(' +', ' ', new_title.lower().strip())
# 输入字幕文件,得到由字幕组成的二维列表
def get_format_sequences(srt_path_):
with open(srt_path_, 'r', encoding='utf-8-sig') as f:
content_ = f.read()
sequences_ = content_.split('\n\n')
sequences_ = [se.split('\n') for se in sequences_]
# 去除每一句空值
sequences_ = [list(filter(None, sequence)) for sequence in sequences_]
new_sequences = []
for se in sequences_:
if len(se) == 4:
new_sequences.append(se)
i = 0
for se in new_sequences:
# 序号,时间段,字幕1,字幕2
new_sequences[i] = [se[0], se[1], se[2], se[3]]
i += 1
return new_sequences
# 输入时间 1:20:12, 输出对应时间的秒数(1*3600+20*60+12)
def str2sec(x):
h, m, s = x.split(':')
return int(h) * 3600 + int(m) * 60 + int(s)
# 输入 02:09:53,440 --> 02:09:55,740,返回一个起始时间对应的秒数和结束时间对应的秒数
def get_start_end_time(str_):
start_time_, end_time_ = str_.strip().split("-->")
start_time_ = start_time_.split(",")[0]
end_time_ = end_time_.split(",")[0]
return str2sec(start_time_), str2sec(end_time_)
def srt_to_format_txt(srt_path):
for fileName in os.listdir(srt_path):
if fileName.endswith(".srt"):
print(fileName)
file_path = srt_path + "\\" + fileName
count = 1
sequences = get_format_sequences(file_path)
# 判断字幕第一行是否包含中文
if check_contain_chinese(sequences[0][2]):
en_position = 3
ch_position = 2
else:
en_position = 2
ch_position = 3
# r: 以只读方式打开文件。文件的指针将会放在文件的开头
# rb: 以二进制格式打开一个文件用于只读。文件指针将会放在文件的开头
# w: 打开一个文件只用于写入。如果该文件已存在则将其覆盖。如果该文件不存在,创建新文件
# wb: 以二进制格式打开一个文件只用于写入。如果该文件已存在则将其覆盖。如果该文件不存在,创建新文件
# a: 打开一个文件用于追加。如果该文件已存在,文件指针将会放在文件的结尾。也就是说,新的内容将会被写入到已有内容之后。如果该文件不存在,创建新文件进行写入
# ab: 以二进制格式打开一个文件用于追加。如果该文件已存在,文件指针将会放在文件的结尾。也就是说,新的内容将会被写入到已有内容之后。如果该文件不存在,创建新文件进行写入
with open(file_path.replace(".srt", ".csv"), "w", encoding='utf-8-sig') as f:
for i in sequences:
en = i[en_position]
ch = i[ch_position]
movie_name = file_path.split("\\")[-1].replace(".srt", "")
count_format = "{:05d}".format(count)
sentence_id = i[0]
en_format = re.sub(' +', ' ', en.replace("- ", " ").replace(",", " ").strip())
ch_format = validate_title(ch)
line = movie_name + "," + count_format + "," + sentence_id + "," + en_format + "," + ch_format
print(line)
f.write(line + "\n")
count = count + 1
4.cut_video.py
import os
import time
from moviepy.video.io.VideoFileClip import VideoFileClip
from cut_srt import get_format_sequences, get_start_end_time
def cut_video_by_start_end(video_path_, save_file_path_, my_start, my_end, save_name):
video = VideoFileClip(video_path_)
video = video.subclip(my_start, my_end)
video.write_videofile(save_file_path_ + save_name, fps=24, logger=None)
video.close()
def cut_video_by_srt(video_path, srt_path):
# 将裁剪后的视频片段存放在 save_file_path
save_file_path = video_path.replace(".mp4", "\\")
if not os.path.exists(save_file_path):
os.makedirs(save_file_path)
count = len(os.listdir(save_file_path))
if count == 0:
count = 1
start_index = 0
else:
count = count
start_index = count - 1
sequences = get_format_sequences(srt_path)
my_time = time.time()
for i in sequences[start_index:]:
file_name = "{:05d}".format(count) + ".mp4"
print(srt_path.split("\\")[-1].replace(".srt", "") +
"共" + str(len(sequences)) + "个,当前:" + file_name +
", 当前进度:" + str("{:<.2f}".format((count / len(sequences)) * 100)) + "%" + "," +
" 耗时:" + str("{:<.2f}".format(time.time() - my_time)) + "s")
start_time, end_time = get_start_end_time(i[1])
cut_video_by_start_end(video_path, save_file_path, start_time - 2, end_time + 2, file_name)
count = count + 1