HTML内容转PDF

首先引入依赖:


    com.itextpdf.tool
    xmlworker
    5.5.8


    com.itextpdf
    itext-asian
    5.2.0

代码:

package org.bigdata.common.util.pdf;

import com.itextpdf.text.Document;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;

import java.io.*;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

public class PdfUtil {
    /**
     * 通过html生成文件
     * @param htmlContent  html格式内容
     * @param file  输出文件file
     */
    public static void createdPdfByItextHtml(String htmlContent,File file){
        InputStream inputStream = null;
        FileOutputStream outputStream = null;
        PdfWriter writer = null;
        try {
            //html格式优化,解决没有结束标签的问题
            org.jsoup.nodes.Document document1= Jsoup.parse(htmlContent,"utf-8");
            org.jsoup.nodes.Document.OutputSettings setting=new org.jsoup.nodes.Document.OutputSettings().prettyPrint(false);
            setting.syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
            org.jsoup.nodes.Document doc = document1.outputSettings(setting);
            //解决pdf自动去除空格的问题
            Elements p = doc.select("p");
            for(Element element : p){
                List children = element.childNodes();
                List childrenNew = new ArrayList<>();
                for(Node c: children){
                    if(c instanceof TextNode){
                        //替换空格,不然生成的pdf会自动去空格
                        String s = c.outerHtml().replaceAll(" ", "\u00a0").replaceAll("\t", "\u00a0\u00a0");
                        ((TextNode) c).text(s);
                        childrenNew.add(c);
                    }else if(c instanceof Element){
                        childrenNew.add(c);
                    }
                }
                int index = 0;
                for(Node node: childrenNew){
                    element.insertChildren(index, node);
                    index++;
                }
            }
            String docHtml = doc.outerHtml();

            // 1. 获取生成pdf的html内容
            inputStream= new ByteArrayInputStream(docHtml.getBytes("utf-8"));
            outputStream = new FileOutputStream(file);
            Document document = new Document();
            writer = PdfWriter.getInstance(document, outputStream);
            document.open();
            // 2. 添加字体
//            XMLWorkerFontProvider fontImp = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
//            fontImp.register(getFontPath());
            // 3. 设置编码
            XMLWorkerHelper.getInstance().parseXHtml(writer, document, inputStream, Charset.forName("UTF-8"),new CustomXMLWorkerFontProvider());
            // 4. 关闭,(不关闭则会生成无效pdf)
            document.close();
        } catch (Exception ex) {
            ex.printStackTrace();
        }finally {
            try {
                if(writer!=null){
                    writer.close();
                }
                if (outputStream != null) {
                    outputStream.close();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
            }catch(IOException ex){
                ex.printStackTrace();
            }
        }
    }





    public static void main(String[] args) throws Exception{

    }
}

你可能感兴趣的:(pdf,html转pdf)