python选择制定页码提取出子pdf

前言

笑死,想打印一篇论文,结果发现不想全都打印,好贵,穷苦孩子留下贫穷的眼泪。

刚好和这篇文章差不多是逆操作 python实现pdf合并



代码

"""
通过特定页码选择pdf
"""
from PyPDF2 import PdfFileReader, PdfFileWriter


def parse_num(lt):
    """
    [1, 3, '5-8']->[1, 3, 5, 6, 7, 8]
    """
    new_lt = []
    for i in lt:
        if isinstance(i, int):
            new_lt.append(i)
        elif isinstance(i, str):
            start_index, end_index = i.split('-')
            start_index, end_index = int(start_index), int(end_index)
            for j in range(start_index, end_index + 1):
                new_lt.append(j) 
        else:
            raise Exception('parse unseen type, just for int or str!')
    return new_lt


def select_by_page_num(entire_pdf, selective_pdf, raw_page_lt):
    page_lt = parse_num(raw_page_lt)

    # pdf_input.getPage默从0开始,于是应该减掉1
    page_lt = [i - 1 for i in page_lt]

    input_file =  open(entire_pdf, 'rb')
    pdf_input = PdfFileReader(input_file)
    pdf_output = PdfFileWriter()

    for i in page_lt:
        pdf_output.addPage(pdf_input.getPage(i))
    with open(selective_pdf, 'wb') as f:
        pdf_output.write(f)
    
    input_file.close()
    print('保存页码 {} 到 {} 成功'.format(raw_page_lt, selective_pdf))


if __name__ == "__main__":
    selective_page_lt = [1, 3, '5-8']
    # print(parse_num(selective_page_lt))

    input_pdf = 'pdf/ddpm.pdf'
    output_pdf = 'pdf/selective_ddpm.pdf'
    select_by_page_num(input_pdf, output_pdf, selective_page_lt)

结果就是导出原来pdf的第1, 3, 5到8页了。

你可能感兴趣的:(实用工具,python,pdf操作)