还是老习惯,最好的学习方式是看代码。
## pip install PyPDF2
##
# 1.打开一个或多个已有的PDF(源PDF),得到PdfFileReader对象。
# 2.创建一个新的PdfFileWriter对象。
# 3.将页面从PdfFileReader对象拷贝到PdfFileWriter对象中。
# 4.最后,利用PdfFilewriter对象写入输出的PDF
## 拷贝页面
import PyPDF2
pdf1File = open('meetingminutes.pdf','rb')
pdf2File = open('meetingminutes2.pdf','rb')
pdf1Reader = PyPDF2.PdfFileReader(pdf1File)
pdf2Reader = PyPDF2.PdfFileReader(pdf2File)
pdfWriter = PyPDF2.PdfFileWriter()
for pageNum in range(pdf1Reader.numPages):
pageObj = pdf1Reader.getPage(pageNum)
pdfWriter.addPage(pageObj)
for pageNum in range(pdf2Reader.numPages):
pageObj = pdf2Reader.getPage(pageNum)
pdfWriter.addPage(pageObj)
# 写入合并文件
pdfOutputFile = open('combinedminutes.pdf','wb')
pdfWriter.write(pdfOutputFile)
pdfOutputFile.close()
pdf1File.close()
pdf2File.close()
## 旋转页面
import PyPDF2
minutesFile = open('meetingminutes.pdf','rb')
pdfReader = PyPDF2.PdfFileReader(minutesFile)
page = pdfReader.getPage(0)
page.rotateClockwise(90) # 页面旋转90度
pdfWriter = PyPDF2.PdfFileWriter()
pdfWriter.addPage(page)
resultPdfFile = open('rotatedpage.pdf','wb')
pdfWriter.write(resultPdfFile)
resultPdfFile.close()
minutesFile.close()
## 叠加页面 - 制作水印
import PyPDF2
minutesFile = open('meetingminutes.pdf','rb')
pdfReader = PyPDF2.PdfFileReader(minutesFile)
page0 = pdfReader.getPage(0)
pdfWaterReader = PyPDF2.PdfFileReader(open('watermark.pdf','rb'))
page0.mergePage(pdfWaterReader.getPage(0)) # 叠加页面
pdfWriter = PyPDF2.PdfFileWriter()
pdfWriter.addPage(page0)
for pageNum in range(1,pdfReader.numPages):
pageObj = pdfReader.getPage(pageNum)
pdfWriter.addPage(pageObj)
resultPdfFile = open('watermarkedCover.pdf','wb')
pdfWriter.write(resultPdfFile)
resultPdfFile.close()
minutesFile.close()
## 加密PDF
import PyPDF2
pdfReader = PyPDF2.PdfFileReader(open('meetingminutes.pdf','rb'))
pdfWriter = PyPDF2.PdfFileWriter()
for pageNum in range(pdfReader.numPages):
pdfWriter.addPage(pdfReader.getPage(pageNum))
pdfWriter.encrypt('password')
resultPdfFile = open('encryptedminutes.pdf','wb')
pdfWriter.write(resultPdfFile)
resultPdfFile.close()
## 合并当前目录下所有PDF的去掉第一页的内容
# 定制生成PDF内容
# Combines all the PDFs in the current working directory into a single PDF.
import PyPDF2,os
# Get all the PDF filenames.
pdfFiles = []
for filename in os.listdir('.'):
if filename.endswith('.pdf'):
pdfFiles.append(filename)
pdfFiles.sort(key=str.lower)
pdfWriter = PyPDF2.PdfFileWriter()
# Loop through all the PDF files.
for filename in pdfFiles:
pdfFileObj = open(filename,'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
# Loop through all the pages (except the first) and add them.
for pageNum in range(1,pdfReader.numPages):
pdfWriter.addPage(pdfReader.getPage(pageNum))
# Save the resulting PDF to a file.
pdfOutputFile = open('allminutes.pdf','wb')
pdfWriter.write(pdfOutputFile)
pdfOutputFile.close()
pdfFileObj.close()