使用PyPDF2 将pdf文件按页拆分成多份pdf

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = 'silva'

__doc__ = """
传入待拆分的pdf文件夹目录的绝对路径: dir_path
新建拆分文件保存拆分后的pdf:dir_path_split
Note: 异常不可拆分文件会保留copy到拆分文件夹里
"""

import os
from shutil import copy
from PyPDF2 import PdfFileReader, PdfFileWriter


split_dir_list = [
    # r'C:\Users\silva\Desktop\银行文档拆分\海峡银行原始数据',
    # r'C:\Users\silva\Desktop\银行文档拆分\桂林银行原始数据',
    # r'C:\Users\silva\Desktop\银行文档拆分\温州银行原始数据',
    r'C:\Users\silva\Desktop\新增追加提取_appdend\原文档copy'
]

error_list = []

def run():
    for dir_path in split_dir_list:
        input_dir_name = os.path.split(dir_path)[-1]
        output_dir_name = input_dir_name + '_split'
        output_dir_path = os.path.join(os.path.split(dir_path)[0], output_dir_name)
        # 创建拆分文件夹
        if not os.path.exists(output_dir_path):
            os.makedirs(output_dir_path)
        # 待拆分文件
        for file in os.listdir(dir_path):
            # 拼接源文件路径
            file_path = os.path.join(dir_path, file)
            if os.path.isfile(file_path) and file.endswith('.pdf'):
                filename = os.path.split(file_path)[-1].strip('.pdf')
                split_pdf(file_path, output_dir_path, filename)
            else:
                print('WARMING: 异常文件路径不能读写', file_path)
                error_list.append(file_path)
                if os.path.isdir(file_path):
                    continue
                error_copy_file = os.path.join(output_dir_path, file)
                copy(file_path, error_copy_file)
    print('异常文件路径汇总:', error_list)

def split_pdf(infile_path, out_path, filename):
    """
    :param infile: 待拆分的pdf文件
    :param out_path: 拆分成单页的pdf文件的存储路径
    :return: 无
    """
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    with open(infile_path, 'rb') as fp:

        reader = PdfFileReader(fp, strict=False)
        number_of_pages = reader.getNumPages()  # 计算此PDF文件中的页数
        print('---------当前文件 {0} 共:{1} 页----------'.format(infile_path, number_of_pages))

        for i in range(number_of_pages):
            writer = PdfFileWriter()
            writer.addPage(reader.getPage(i))
            out_file_name = os.path.join(out_path, filename+'_{}.pdf'.format(i+1))
            with open(out_file_name, 'wb') as outfile:
                writer.write(outfile)
                print('拆分成功:', out_file_name)

if __name__ == '__main__':
    print(__doc__)
    run()

你可能感兴趣的:(使用PyPDF2 将pdf文件按页拆分成多份pdf)