java 实现word转pdf

将DOCX文档转化为PDF是项目中常见的需求之一,目前主流的方法可以分为两大类,一类是利用各种Office应用进行转换,譬如Microsoft Office、WPS以及LiberOffice,另一种是利用各种语言提供的对于Office文档读取的接口(譬如Apache POI)然后使用专门的PDFGenerator库,譬如IText进行PDF构建。总的来说,从样式上利用Office应用可以保证较好的样式,不过相对而言效率会比较低。其中Microsoft Office涉及版权,不可轻易使用(笔者所在公司就被抓包了),WPS目前使用比较广泛,不过存在超链接截断问题,即超过256个字符的超链接会被截断,LiberOffice的样式排版相对比较随意。而利用POI接口进行读取与生成的方式性能较好,适用于对于格式要求不是很高的情况。另外还有一些封装好的在线工具或者命令行工具,譬如docx2pdf与OfficeTOpdf

以下是Apache POI实现word转pdf

 

1.maven jar


args4j
args4j
2.32


org.docx4j
docx4j
3.2.1


fr.opensagres.xdocreport
org.apache.poi.xwpf.converter.pdf
1.0.6



fr.opensagres.xdocreport
org.odftoolkit.odfdom.converter.pdf
1.0.6


            com.googlecode.jaxb-namespaceprefixmapper-interfaces
            JAXBNamespacePrefixMapper
            2.2.4
            runtime
        



com.sun.xml.bind
jaxb-impl
2.2.11


com.sun.xml.bind
jaxb-core
2.2.11

       


   org.apache.xmlbeans
   xmlbeans
   2.6.0


org.apache.poi
poi
3.14


org.apache.poi
poi-scratchpad
3.14


org.apache.poi
poi-ooxml
3.14

 

2.实现类

Converter

 

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;


public abstract class Converter {


private final String LOADING_FORMAT = "\nLoading stream\n\n";
private final String PROCESSING_FORMAT = "Load completed in %1$dms, now converting...\n\n";
private final String SAVING_FORMAT = "Conversion took %1$dms.\n\nTotal: %2$dms\n";


private long startTime;
private long startOfProcessTime;


protected InputStream inStream;
protected OutputStream outStream;


protected boolean showOutputMessages = false;
protected boolean closeStreamsWhenComplete = true;


public Converter(InputStream inStream, OutputStream outStream, boolean showMessages, boolean closeStreamsWhenComplete){
this.inStream = inStream;
this.outStream = outStream;
this.showOutputMessages = showMessages;
this.closeStreamsWhenComplete = closeStreamsWhenComplete;
}


public abstract void convert() throws Exception;


private void startTime(){
startTime = System.currentTimeMillis();
startOfProcessTime = startTime;
}


protected void loading(){
sendToOutputOrNot(String.format(LOADING_FORMAT));
startTime();
}


protected void processing(){
long currentTime = System.currentTimeMillis();
long prevProcessTook = currentTime - startOfProcessTime;


sendToOutputOrNot(String.format(PROCESSING_FORMAT, prevProcessTook));


startOfProcessTime = System.currentTimeMillis();


}


protected void finished(){
long currentTime = System.currentTimeMillis();
long timeTaken = currentTime - startTime;
long prevProcessTook = currentTime - startOfProcessTime;


startOfProcessTime = System.currentTimeMillis();


if(closeStreamsWhenComplete){
try {
inStream.close();
outStream.close();
} catch (IOException e) {
//Nothing done
}
}


sendToOutputOrNot(String.format(SAVING_FORMAT, prevProcessTook, timeTaken));
}




private void sendToOutputOrNot(String toBePrinted){
if(showOutputMessages){
actuallySendToOutput(toBePrinted);
}
}


protected void actuallySendToOutput(String toBePrinted){
}


}

 

 

 

DocToPDFConverter:

 

import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.net.URL;


import org.apache.commons.io.IOUtils;
import org.docx4j.Docx4J;
import org.docx4j.convert.in.Doc;
import org.docx4j.convert.out.FOSettings;
import org.docx4j.fonts.IdentityPlusMapper;
import org.docx4j.fonts.Mapper;
import org.docx4j.fonts.PhysicalFont;
import org.docx4j.fonts.PhysicalFonts;
import org.docx4j.jaxb.Context;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.wml.RFonts;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;


public class DocToPDFConverter extends Converter {


public DocToPDFConverter(InputStream inStream, OutputStream outStream, boolean showMessages,
boolean closeStreamsWhenComplete) {
super(inStream, outStream, showMessages, closeStreamsWhenComplete);
}


@Override
public void convert() throws Exception {
loading();


InputStream iStream = inStream;
try {
WordprocessingMLPackage wordMLPackage = getMLPackage(iStream);
Mapper fontMapper = new IdentityPlusMapper();
String fontFamily = "SimSun";

Resource fileRource = new ClassPathResource("simsun.ttf");
String path =  fileRource.getFile().getAbsolutePath();
URL fontUrl = new URL("file:"+path);
PhysicalFonts.addPhysicalFont(fontUrl);

PhysicalFont simsunFont = PhysicalFonts.get(fontFamily);
fontMapper.put(fontFamily, simsunFont);


RFonts rfonts = Context.getWmlObjectFactory().createRFonts(); // 设置文件默认字体
rfonts.setAsciiTheme(null);
rfonts.setAscii(fontFamily);
wordMLPackage.getMainDocumentPart().getPropertyResolver().getDocumentDefaultRPr().setRFonts(rfonts);
wordMLPackage.setFontMapper(fontMapper);
FOSettings foSettings = Docx4J.createFOSettings();
foSettings.setWmlPackage(wordMLPackage);
Docx4J.toFO(foSettings, outStream, Docx4J.FLAG_EXPORT_PREFER_XSL);


} catch (Exception ex) {
ex.printStackTrace();
} finally {
IOUtils.closeQuietly(outStream);
}


/*
* InputStream iStream = inStream;
* 
* 
* 
* String regex = null; //Windows: // String
* regex=".*(calibri|camb|cour|arial|symb|times|Times|zapf).*"; regex=
* ".*(calibri|camb|cour|arial|times|comic|georgia|impact|LSANS|pala|tahoma|trebuc|verdana|symbol|webdings|wingding).*";
* // Mac // String //
* regex=".*(Courier New|Arial|Times New Roman|Comic Sans|Georgia|Impact|Lucida Console|Lucida Sans Unicode|Palatino Linotype|Tahoma|Trebuchet|Verdana|Symbol|Webdings|Wingdings|MS Sans Serif|MS Serif).*"
* ; PhysicalFonts.setRegex(regex); WordprocessingMLPackage
* wordMLPackage = getMLPackage(iStream); // WordprocessingMLPackage
* wordMLPackage = WordprocessingMLPackage.load(iStream) FieldUpdater
* updater = new FieldUpdater(wordMLPackage); updater.update(true); //
* process processing(); // Add font
* 
* Mapper fontMapper = new IdentityPlusMapper();
* 
* PhysicalFont font = PhysicalFonts.get("Arial UTF-8 MS"); if (font !=
* null) { fontMapper.put("Times New Roman", font);
* fontMapper.put("Arial", font); fontMapper.put("Calibri", font); }
* fontMapper.put("Calibri", PhysicalFonts.get("Calibri"));
* fontMapper.put("Algerian", font); fontMapper.put("华文行楷",
* PhysicalFonts.get("STXingkai")); fontMapper.put("华文仿宋",
* PhysicalFonts.get("STFangsong")); fontMapper.put("隶书",
* PhysicalFonts.get("LiSu")); fontMapper.put("Libian SC Regular",
* PhysicalFonts.get("SimSun"));
* wordMLPackage.setFontMapper(fontMapper); FOSettings foSettings =
* Docx4J.createFOSettings(); foSettings.setFoDumpFile(new
* java.io.File("E:/xi.fo")); foSettings.setWmlPackage(wordMLPackage);
* // Docx4J.toPDF(wordMLPackage, outStream); Docx4J.toFO(foSettings,
* outStream, Docx4J.FLAG_EXPORT_PREFER_XSL);
*/
finished();


}


protected WordprocessingMLPackage getMLPackage(InputStream iStream) throws Exception {
//PrintStream originalStdout = System.out;


System.setOut(new PrintStream(new OutputStream() {
public void write(int b) {
// DO NOTHING
}
}));


WordprocessingMLPackage mlPackage = Doc.convert(iStream);
//System.setOut(originalStdout);
//System.out.println(outStream);
return mlPackage;
}


}

 

 

 

 

DocxToPDFConverter:

import java.awt.Color;
import java.io.InputStream;
import java.io.OutputStream;


import org.apache.poi.xwpf.converter.pdf.PdfConverter;
import org.apache.poi.xwpf.converter.pdf.PdfOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;


import com.lowagie.text.Font;
import com.lowagie.text.pdf.BaseFont;


import fr.opensagres.xdocreport.itext.extension.font.ITextFontRegistry;


public class DocxToPDFConverter extends Converter {


public DocxToPDFConverter(InputStream inStream, OutputStream outStream, boolean showMessages,
boolean closeStreamsWhenComplete) {
super(inStream, outStream, showMessages, closeStreamsWhenComplete);
}


@Override
public void convert() throws Exception {
loading();


PdfOptions options = PdfOptions.create();
XWPFDocument document = new XWPFDocument(inStream);


//支持中文字体
options.fontProvider(new ITextFontRegistry() {
public Font getFont(String familyName, String encoding, float size, int style, Color color) {
try {
Resource fileRource = new ClassPathResource("simsun.ttf");
String path =  fileRource.getFile().getAbsolutePath();


BaseFont bfChinese = BaseFont.createFont(path, BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
Font fontChinese = new Font(bfChinese, size, style, color);
if (familyName != null)
fontChinese.setFamily(familyName);
return fontChinese;
} catch (Throwable e) {
e.printStackTrace();
return ITextFontRegistry.getRegistry().getFont(familyName, encoding, size, style, color);
}
}


});


processing();
PdfConverter.getInstance().convert(document, outStream, options);


finished();
}


}

 

main 方法的实现代码

Converter converter;

 

path = request.getSession().getServletContext().getRealPath("").replaceAll("\\\\", "/") + "/flyingsauser/preview.pdf";
File file = new File(path);
OutputStream outputStream = new FileOutputStream(file);
String url = attachmentEntity.getUrl();
inputStream = OSSClientUtil.getFileObject(url);

if(!file.exists()){
file.createNewFile();
}
if(url.endsWith(".docx")) {
converter = new DocxToPDFConverter(inputStream, outputStream, true, true);
converter.convert();
fileInputStream = new FileInputStream(file);
} else if(url.endsWith(".doc")){
converter = new DocToPDFConverter(inputStream, outputStream, true, true);
converter.convert();
fileInputStream = new FileInputStream(file);

 

 

 

以上就是word转pdf的实现,里面添加了对中文的支持需要添加simsun.ttf。

具体源码实现参照了下方的github的代码

https://github.com/yeokm1/docs-to-pdf-converter

 

你可能感兴趣的:(java后台;前后端分离)