电影字幕可以从网上下载scr文件,下载后便于学习。但是对于孩子,总看计算机也不好,因此需要将字幕打印下来。如果直接打印scr文件,则文件相当大,没用的换行也非常多。因此对其整理后,可以减少打印数量。手工整理非常费时费力,因此想到使用python写个简单的处理代码。由于很简单,未加注释,对照scr文件,很容易理解。
处理过程中,如果对白之间有超过5秒的空白,则认为切换到另一个对话场景。
python代码如下:
from __future__ import print_function
import sys
import os
def time_interval(start, end):
int_start = int(start[:2])*3600 + int(start[3:5])*60 + int(start[6:8])
int_end = int(end[:2])*3600 + int(end[3:5])*60 + int(end[6:8])
return int_end - int_start
def space_print(x, n):
if len(x)<n:
for ln in range(n-len(x)):
print(" ", end='')
print(x, end='')
def subtitles2screen(scr):
with open(scr) as f_scr:
previous_finish_time = ""
previous_subtitles_index = ""
subtitles_index = "";
scene_index = 1
for line in f_scr.readlines():
line = line.replace("\n", "").strip()
if line.isdigit():
subtitles_index = line
elif line.find(" --> ") != -1:
if previous_finish_time.strip()=="":
print("scene: " + str(scene_index), end='')
scene_index += 1
elif time_interval(previous_finish_time, line[0:12]) > 5:
print("\nscene: " + str(scene_index) + " " + previous_finish_time, end='')
scene_index += 1
previous_finish_time = line[17:29]
elif line == "":
pass
else:
if subtitles_index == previous_subtitles_index:
print(" " + line, end='')
else:
print("")
space_print(subtitles_index, 4)
print(": " + line, end='')
previous_subtitles_index = subtitles_index
if __name__ == "__main__" :
if len(sys.argv) < 2:
print("usage: python " + sys.argv[0] + " " )
elif not os.path.exists(sys.argv[1]):
print("subtitles file does not exist")
else:
subtitles2screen(sys.argv[1])