import re # 导入re模块
s = """
[00:45.09]M: It's my last day at work tomorrow.
[00:47.32] I start my new job in two weeks.
[00:49.70]My human resources manager wants to
[00:51.65]conduct an interview with me before I leave.
[00:54.48]W: Ah, an exit interview.
[00:56.67]Are you looking forward to it?
[00:58.37]M: I'm not sure how I feel about it.
[01:00.72]I resigned because I've been unhappy
[27:46.11]to reduce fatal road accidents
[27:48.87]in addition to safer roads?
"""
pattern = re.compile("\[.*\]") # 编译正则表达式,以[开头,以]结尾的字符串
li = pattern.findall(s) # 从s中找出所有的时间标记,结果存到列表里
# 此时li的内容为:
# ['[00:45.09]', '[00:47.32]', '[00:49.70]', '[00:51.65]', '[00:54.48]', '[00:56.67]', '[00:58.37]', '[01:00.72]', '[27:46.11]', '[27:48.87]', '[28:04.17]']
# []是列表,里面是找到的字符串
for i in li:
s = s.replace(i,'') # 将文本中所有的时间标记替换为空字符串
print(s.replace('\n',' ')) # 将所有的换行替换为空格,并打印内容
结果为:
M: It’s my last day at work tomorrow. I start my new job in two weeks. My human resources manager wants to conduct an interview with me before I leave. W: Ah, an exit interview. Are you looking forward to it? M: I’m not sure how I feel about it. I resigned because I’ve been unhappy to reduce fatal road accidents in addition to safer roads?