2019独角兽企业重金招聘Python工程师标准>>>
在http://my.oschina.net/yifanxiang/blog/678139中。修改了一下代码如下:
/**
* 生成单页pdf
* @param ctx
* @return
* @throws DocumentException
* @throws IOException
*/
public static byte[] buildPdf(String ctx) throws DocumentException, IOException{
ByteArrayOutputStream baos=new ByteArrayOutputStream(1024);
Document document = new Document();
PdfWriter writer = PdfWriter.getInstance(document, baos);
writer.setInitialLeading(12);//文字间距
document.open();
HtmlToPdfUtil.MyFontsProvider fontProvider = new HtmlToPdfUtil.MyFontsProvider();
fontProvider.addFontSubstitute("lowagie", "garamond");
fontProvider.setUseUnicode(true);
CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
DefaultTagProcessorFactory tpf=(DefaultTagProcessorFactory)Tags.getHtmlTagProcessorFactory();
tpf.addProcessor(Tag.IMG, Image.class.getName());//默认是com.itextpdf.tool.xml.html.Image|自个定义一个image的处理类
htmlContext.setTagFactory(tpf);
CSSResolver cssResolver = XMLWorkerHelper.getInstance().getDefaultCssResolver(true);
Pipeline> pipeline = new CssResolverPipeline(cssResolver,new HtmlPipeline(htmlContext, new PdfWriterPipeline(document,writer)));
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser p = new XMLParser(worker);
ByteArrayInputStream bais=new ByteArrayInputStream(ctx.getBytes());
p.parse(new InputStreamReader(bais));
p.flush();
document.close();
byte[] result=baos.toByteArray();
baos.flush();
baos.close();
return result;
}
主要就是以下两句:
DefaultTagProcessorFactory tpf=(DefaultTagProcessorFactory)Tags.getHtmlTagProcessorFactory();
tpf.addProcessor(Tag.IMG, Image.class.getName());//默认是com.itextpdf.tool.xml.html.Image|自个定义一个image的处理类(这个Image是我自个定义的image)
自定义的Image类:
package com.junziqian.common.convert;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.codec.binary.Base64;
import com.itextpdf.text.BadElementException;
import com.itextpdf.text.Chunk;
import com.itextpdf.text.Element;
import com.itextpdf.text.log.Level;
import com.itextpdf.text.log.Logger;
import com.itextpdf.text.log.LoggerFactory;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfString;
import com.itextpdf.text.xml.XMLUtil;
import com.itextpdf.tool.xml.NoCustomContextException;
import com.itextpdf.tool.xml.Tag;
import com.itextpdf.tool.xml.WorkerContext;
import com.itextpdf.tool.xml.css.CssUtils;
import com.itextpdf.tool.xml.exceptions.LocaleMessages;
import com.itextpdf.tool.xml.exceptions.RuntimeWorkerException;
import com.itextpdf.tool.xml.html.AbstractTagProcessor;
import com.itextpdf.tool.xml.html.HTML;
import com.itextpdf.tool.xml.net.ImageRetrieve;
import com.itextpdf.tool.xml.net.exc.NoImageException;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
public class Image extends AbstractTagProcessor {
private final CssUtils utils = CssUtils.getInstance();
private static final Logger logger = LoggerFactory.getLogger(Image.class);
/*
* (non-Javadoc)
*
* @see
* com.itextpdf.tool.xml.TagProcessor#endElement(com.itextpdf.tool.xml.Tag,
* java.util.List, com.itextpdf.text.Document)
*/
@Override
public List end(final WorkerContext ctx, final Tag tag, final List currentContent) {
Map attributes = tag.getAttributes();
String src = attributes.get(HTML.Attribute.SRC);
com.itextpdf.text.Image img = null;
List l = new ArrayList(1);
if (null != src && src.length() > 0) {
// check if the image was already added once
src = XMLUtil.unescapeXML(src);
src = src.trim();
try {
if (logger.isLogging(Level.TRACE)) {
logger.trace(String.format(LocaleMessages.getInstance().getMessage(LocaleMessages.HTML_IMG_USE), src));
}
if(src.matches("^data:image/.{1,10};base64,.+$")){//base64的图片数据
//byte[] imgData=Base64.decodeBase64(src.substring(0,src.indexOf("base64,")));
byte[] imgData=Base64.decodeBase64(src.substring(src.indexOf("base64,")+7));
try {
img=com.itextpdf.text.Image.getInstance(imgData);
} catch (BadElementException | IOException e) {
throw new RuntimeException(e);
}
}else{
HtmlPipelineContext context = getHtmlPipelineContext(ctx);
img = new ImageRetrieve(context.getResourcesRootPath(), context.getImageProvider()).retrieveImage(src);
}
} catch (NoImageException e) {
if (logger.isLogging(Level.ERROR)) {
logger.error(String.format(LocaleMessages.getInstance().getMessage(LocaleMessages.HTML_IMG_RETRIEVE_FAIL), src), e);
}
} catch (NoCustomContextException e) {
throw new RuntimeWorkerException(LocaleMessages.getInstance().getMessage(LocaleMessages.NO_CUSTOM_CONTEXT), e);
}
if (null != img) {
try {
if ( attributes.get(HTML.Attribute.ALT) != null) {
img.setAccessibleAttribute(PdfName.ALT, new PdfString(attributes.get(HTML.Attribute.ALT)));
}
HtmlPipelineContext htmlPipelineContext = getHtmlPipelineContext(ctx);
l.add(getCssAppliers().apply(new Chunk((com.itextpdf.text.Image) getCssAppliers().apply(img, tag, htmlPipelineContext), 0, 0, true), tag, htmlPipelineContext));
} catch (NoCustomContextException e) {
throw new RuntimeWorkerException(e);
}
}
}
return l;
}
/*
* (non-Javadoc)
*
* @see com.itextpdf.tool.xml.TagProcessor#isStackOwner()
*/
@Override
public boolean isStackOwner() {
return false;
}
}
这样运行下如下代码就可以生成pdf中代入图片了:
public static void main(String[] args) throws IOException, DocumentException {
String DEST = "./test2015-11.pdf";
File file = new File(DEST);
file.getParentFile().mkdirs();
ArrayList str=new ArrayList();
str.add(JsoupUtil.getXhtml("中文hello
"));
str.add(JsoupUtil.getXhtml("中文hello111
"));
HtmlToPdfUtil.buildPdf(str, DEST);
}
生成了pdf,自个看下,没有问题
加上jsoup的改动(org.jsoup.nodes.Element):
void outerHtmlHead(Appendable accum, int depth, OutputSettings out) throws IOException { if(out.prettyPrint() && (this.tag.formatAsBlock() || this.parent() != null && this.parent().tag().formatAsBlock() || out.outline())) { if(accum instanceof StringBuilder) { if(((StringBuilder)accum).length() > 0) { this.indent(accum, depth, out); } } else { this.indent(accum, depth, out); } } accum.append("<").append(this.tagName()); this.attributes.html(accum, out); if(this.childNodes.isEmpty() && this.tag.isSelfClosing()) { if(!OutputSettings.formatXhtml && out.syntax() == Syntax.html && this.tag.isEmpty()) { accum.append('>'); } else { accum.append(" />"); } } else { accum.append(">"); } }
及org.jsoup.nodes.Document.OutputSettings:
public static boolean formatXhtml = false;//加入这行
jsoup使用方法:
/** * 返回标准的html文本 * @param strHtml * @return */ public static String getHtml(String strHtml){ Document.OutputSettings.formatXhtml=false; Document doc=Jsoup.parse(strHtml); Element head=doc.getElementsByTag("head").first(); return doc.html(); } /** * 返回标准的xhtml文本|并去掉了javascript代码标签 * @param strHtml * @return */ public static String getXhtml(String strHtml){ Document.OutputSettings.formatXhtml=true; Document doc=Jsoup.parse(strHtml,"UTF-8"); Element head=doc.getElementsByTag("head").first(); head.append(""); doc.select("script").remove(); return doc.html(); }