python操作word转pdf

代码

import docx
import requests
from lxml import etree
import codecs
import docx
from docx.shared import RGBColor
from docx.shared import Inches
from docx.shared import Pt
from docx.enum.text import  WD_PARAGRAPH_ALIGNMENT  #WD_ALIGN_PARAGRAPH以前的库是这个已经废弃了,如果你遇到这种案例时建议替换为新的方法库
from docx.oxml.ns import qn
import time
import os
import sys
from win32com import client
import fitz


headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36 Edg/89.0.774.54"}
url_list = []
def get_url():
    write_intro()
    for i in range(1,5):
        url = "http://liucixin.zuopinj.com/5560/20211%s.html"%i
        url_list.append(url)

def get_content():
    get_url()
    for url in url_list:
        res = requests.get(url,headers=headers).content.decode("utf-8")
        # print(res)
        with open("源码.html", "w", encoding="utf-8") as fp:
            fp.write(res)
        page = etree.HTML(res)
        # print(page)
        content = page.xpath("//p/text()")
        del content[-1]
        num = len(url_list)
        content = "".join(content).replace("\u3000\u3000", "")
        print(content)


    

你可能感兴趣的:(实用,python)