python修改pdf元信息 metadata

pdf 文件是签名只读的,所以要先读出来然后在写出去

使用前先安装 pypdf2 中文的坑请参考 https://github.com/mstamy2/PyPDF2/pull/463

talk is cheap,show you the code


from PyPDF2 import PdfFileReader, PdfFileWriter
from multiprocessing import Process, Queue
import os,time
import getopt, sys,shutil



def update_metadata(pdf):
    readFile = pdf['source']
    writeFile = pdf['to']
    # 获取一个 PdfFileReader 对象
    pdfReader = PdfFileReader(open(readFile, 'rb'))
    print(pdfReader.getDocumentInfo())
    # 获取一个 PdfFileWriter 对象
    pdfWriter = PdfFileWriter()
    # 这里输入要修改的元信息,当然又可以在原来的信息里面加,我不会高级的设计模式,大牛改进后可以发给我
    pdfWriter.addMetadata({'/Author':'youngboy','/Title':'youngboy','/Creator':'youngboy'})
    # 将一个 PageObject 加入到 PdfFileWriter 中
    pdfWriter.appendPagesFromReader(pdfReader)
    # 输出到文件中
    pdfWriter.write(open(writeFile, 'wb+'))

def long_time_task(q):
    while not q.empty():
        print("剩余任务"+str(q.qsize()))
        v=q.get()
        update_metadata(v)

def usage():
    print("""
        - r root 目录
        - p 进程数(程池不会用所以这个参数没意义)
    """)

if __name__=='__main__':

    print(sys.argv[1:])
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hr:p:")
    except getopt.GetoptError as err:
        # print help information and exit:
        print(err)
        usage()
        sys.exit(2)
    root = None
    pnum = 3;
    verbose = False
    for o, a in opts:
        if o == "-r":
            root = a
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-p", "--process"):
            pnum = a
        else:
            assert False, "unhandled option"

    q = Queue()
    ## 把任务装进队列
    shutil.rmtree(root+'/dest')
    for (r, dirs, files) in os.walk(root):
        for f in files:
            dd = r.replace(root,'')
            to_path=root+'/dest'+dd+'/'+f
            os.makedirs(root+'/dest'+dd,exist_ok=True)
            print(dd+"--"+r+"--"+root)
            if f.find('pdf')>-1:
                q.put({
                    'source':r+'/'+f,
                    'to':to_path
                })

    print(q.qsize())
    # 使用两个进程,进程池不会用所以就手动new
    pw1 = Process(target=long_time_task, args=(q,))
    pw1.start()
    pw2 = Process(target=long_time_task, args=(q,))
    pw2.start()
    pw1.join()
    pw2.join()
    print('All subprocesses done.')

使用示例

python xx.py -r D:/pdf

你可能感兴趣的:(python修改pdf元信息 metadata)