0% found this document useful (0 votes)
7 views2 pages

Devide

The document contains a Python script that merges subtitles from a file by processing lines to combine consecutive subtitles without punctuation. It reads the input subtitle file, filters lines based on punctuation, and then merges them if they meet certain criteria. Finally, it writes the merged subtitles to an output file in the correct format.

Uploaded by

zrr20031119
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
7 views2 pages

Devide

The document contains a Python script that merges subtitles from a file by processing lines to combine consecutive subtitles without punctuation. It reads the input subtitle file, filters lines based on punctuation, and then merges them if they meet certain criteria. Finally, it writes the merged subtitles to an output file in the correct format.

Uploaded by

zrr20031119
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

def merge_subtitles(lines):

merged_lines = []
i = 0
while i < len(lines):
if lines[i].strip().isdigit():
if i + 2 < len(lines):
current_num = lines[i].strip()
current_time = lines[i + 1].strip()
current_text = lines[i + 2].strip()

if i + 5 < len(lines) and lines[i + 3].strip().isdigit():


next_num = lines[i + 3].strip()
next_time = lines[i + 4].strip()
next_text = lines[i + 5].strip()

ends_with_punct = any(current_text.endswith(p) for p in ['.',


'...', '?', '!'])

if not ends_with_punct:
start_time = current_time.split(' --> ')[0]
end_time = next_time.split(' --> ')[1]
merged_time = f"{start_time} --> {end_time}"

merged_lines.append([current_num, merged_time, next_text])


i += 6
continue

merged_lines.append([current_num, current_time, current_text])


i += 3
else:
merged_lines.append([lines[i].strip()])
i += 1
else:
merged_lines.append([lines[i].strip()])
i += 1

return merged_lines

def process_subtitle_file(input_path, output_path):


with open(input_path, 'r', encoding='utf-8') as file:
lines = file.readlines()

keep_lines = set()

for i in range(len(lines)):
line = lines[i]
if any(sep in line for sep in ['.', '...', '?', '!']):
for j in range(max(0, i-2), min(len(lines), i+5)):
keep_lines.add(j)

filtered_lines = [lines[i] for i in range(len(lines)) if i in keep_lines]

merged_subtitles = merge_subtitles(filtered_lines)
with open(output_path, 'w', encoding='utf-8') as file:
new_index = 1
for i, subtitle in enumerate(merged_subtitles):
if len(subtitle) == 3: # 完整的字幕块
file.write(f"{new_index}\n")
file.write(f"{subtitle[1]}\n")
file.write(f"{subtitle[2]}")

if i < len(merged_subtitles) - 1:
file.write("\n\n")
else:
file.write("\n")
new_index += 1

input_file = r"(the path of file)"


output_file = r"(the path of target)"
process_subtitle_file(input_file, output_file)

You might also like