Python - 小玩意 - html 转 pdf

本地文件

import pdfkit
import os


def convert_html_to_pdf(html_file, output_directory, pdfkit_config):
    base_name = os.path.basename(html_file)
    file_name_without_extension = os.path.splitext(base_name)[0]
    pdf_file = os.path.join(output_directory, f"{file_name_without_extension}.pdf")

    try:
        pdfkit.from_file(html_file, pdf_file, configuration=pdfkit_config)
        print(f"Converted {html_file} to {pdf_file}")
    except Exception as e:
        print(f"Failed to convert {html_file} to PDF: {str(e)}")


if __name__ == '__main__':
	# html存放路径
    input_directory = 'htmls'
    # pdf 存放路径
    output_directory = 'pdfs'
    # wkhtmltopdf安装路径
    wkhtmltopdf_path = r'D:\install_address\wkhtmltopdf\bin\wkhtmltopdf.exe'

    config = pdfkit.configuration(wkhtmltopdf=wkhtmltopdf_path)

    html_files = [os.path.join(input_directory, file) for file in os.listdir(input_directory) if file.endswith('.html')]

    for html_file in html_files:
        convert_html_to_pdf(html_file, output_directory, config)

本地文件 & 网络链接

import os
import pdfkit

def convert_html_to_pdf(source, output_directory, pdfkit_config):
    if source.startswith('http'):
        # 处理来自网址的HTML
        pdf_file_name = source.split('/')[-1].split('?')[0] + '.pdf'
        pdf_file = os.path.join(output_directory, pdf_file_name)

        try:
            pdfkit.from_url(source, pdf_file, configuration=pdfkit_config)
            print(f"Converted URL '{source}' to {pdf_file}")
        except Exception as e:
            print(f"Failed to convert URL '{source}' to PDF: {str(e)}")
    elif os.path.isfile(source):
        # 处理本地HTML文件
        base_name = os.path.basename(source)
        file_name_without_extension = os.path.splitext(base_name)[0]
        pdf_file = os.path.join(output_directory, f"{file_name_without_extension}.pdf")

        try:
            pdfkit.from_file(source, pdf_file, configuration=pdfkit_config)
            print(f"Converted {source} to {pdf_file}")
        except Exception as e:
            print(f"Failed to convert {source} to PDF: {str(e)}")
    else:
        print(f"Unsupported source: {source}")


if __name__ == '__main__':
    input_sources = [
        'https://lfsun666.blog.csdn.net/article/details/133148657?ydreferer'
        '=aHR0cHM6Ly9tcC5jc2RuLm5ldC9tcF9ibG9nL21hbmFnZS9hcnRpY2xlP3NwbT0xMDAxLjIwMTQuMzAwMS41NDQ4?ydreferer'
        '=aHR0cHM6Ly9tcC5jc2RuLm5ldC9tcF9ibG9nL21hbmFnZS9hcnRpY2xlP3NwbT0xMDAxLjIwMTQuMzAwMS41NDQ4',  # 网络链接
        'htmls/report.html',  # 本地HTML文件
        # 可以添加更多的链接或本地文件路径
    ]
    output_directory = 'pdfs'
    wkhtmltopdf_path = r'D:\install_address\wkhtmltopdf\bin\wkhtmltopdf.exe'

    config = pdfkit.configuration(wkhtmltopdf=wkhtmltopdf_path)

    for source in input_sources:
        convert_html_to_pdf(source, output_directory, config)

你可能感兴趣的:(Python,python,html,pdf)