python pypdf提取PDF元数据

!/usr/bin/python

coding=utf-8

import pyPdf
import optparse
from pyPdf import PdfFileReader

使用getDocumentInfo()函数提取PDF文档所有的元数据

def printMeta(fileName):
pdfFile = PdfFileReader(file(fileName, 'rb'))
docInfo = pdfFile.getDocumentInfo()
print "[*] PDF MeataData For: " + str(fileName)
for meraItem in docInfo:
print "[+] " + meraItem + ": " + docInfo[meraItem]

def main():
parser = optparse.OptionParser("[*]Usage: python pdfread.py -F ")
parser.add_option('-F', dest='fileName', type='string', help='specify PDF file name')
(options, args) = parser.parse_args()
fileName = options.fileName
if fileName == None:
print parser.usage
exit(0)
else:
printMeta(fileName)

if name == 'main':
main()

你可能感兴趣的:(python pypdf提取PDF元数据)