phtyon读取pdf的远程地址解析内容

import xlwt,pymysql,requests,json,datetime,PyPDF2,urllib.request,io,ssl
 

#  添加context 解决读取pdf时SSL报错问题

context = ssl._create_unverified_context()

# 读取pdf地址 获取pdf内容

req = urllib.request.urlopen(contract_download_url,context=context)
remote_file=req.read()
memory_file = io.BytesIO(remote_file)
read_pdf = PyPDF2.PdfReader(memory_file)

# 获取pdf页数
number_of_pages = len(read_pdf.pages)
for i in range(0, number_of_pages):
        pageObj = read_pdf.pages[i]
        # 获取当前页数的pdf内容
        page = pageObj.extract_text()

# 处理后续业务流程 .......................

你可能感兴趣的:(python)