Java使用PDFBox开发包实现对PDF文档内容编辑与保存

pdfbox开发包下载地址:http://pdfbox.apache.org/

程序实现了PDF文档的创建,读入,与修改PDF内容并保存。

可能有个前提,PDF文档不是加密的,如果加密怎么办,我没研究过!

源代码如下:

package com.gloomyfish.ups.pdf.reader;  import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.util.List;  import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.exceptions.COSVisitorException; import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdfwriter.ContentStreamWriter; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.pdmodel.edit.PDPageContentStream; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDType1Font; import org.apache.pdfbox.util.PDFOperator; import org.apache.pdfbox.util.PDFTextStripper;  /**  * http://pdfbox.apache.org/  *   * @author fish  *   */ public class PDFReader { 	 	public PDFReader() 	{ 		createHelloPDF(); 		readPDF(); 		editPDF(); 	}  	public void createHelloPDF() { 		PDDocument doc = null; 		PDPage page = null;  		try { 			doc = new PDDocument(); 			page = new PDPage();  			doc.addPage(page); 			PDFont font = PDType1Font.HELVETICA_BOLD; 			PDPageContentStream content = new PDPageContentStream(doc, page); 			content.beginText(); 			content.setFont(font, 12); 			content.moveTextPositionByAmount(100, 700); 			content.drawString("Hello");  			content.endText(); 			content.close(); 			doc.save("D:\\gloomyfish\\pdfwithText.pdf"); 			doc.close(); 		} catch (Exception e) { 			System.out.println(e); 		} 	} 	 	public void readPDF() 	{ 		PDDocument helloDocument; 		try { 			helloDocument = PDDocument.load(new File( 					"D:\\gloomyfish\\pdfwithText.pdf")); 			PDFTextStripper textStripper = new PDFTextStripper(); 			System.out.println(textStripper.getText(helloDocument)); 			helloDocument.close(); 		} catch (IOException e) { 			// TODO Auto-generated catch block 			e.printStackTrace(); 		} 	} 	 	public void editPDF() { 		  		try { 			// pdfwithText 			PDDocument helloDocument = PDDocument.load(new File("D:\\gloomyfish\\pdfwithText.pdf")); 			// PDDocument helloDocument = PDDocument.load(new File("D:\\gloomyfish\\hello.pdf")); 			// int pageCount = helloDocument.getNumberOfPages(); 			PDPage firstPage = (PDPage)helloDocument.getDocumentCatalog().getAllPages().get(0); 			// PDPageContentStream content = new PDPageContentStream(helloDocument, firstPage); 			PDStream contents = firstPage.getContents(); 			 			PDFStreamParser parser = new PDFStreamParser(contents.getStream());               parser.parse();               List tokens = parser.getTokens();               for (int j = 0; j < tokens.size(); j++)               {                   Object next = tokens.get(j);                   if (next instanceof PDFOperator)                   {                   	PDFOperator op = (PDFOperator) next;                       // Tj and TJ are the two operators that display strings in a PDF                       if (op.getOperation().equals("Tj"))                       {                           // Tj takes one operator and that is the string                           // to display so lets update that operator                           COSString previous = (COSString) tokens.get(j - 1);                           String string = previous.getString();                           string = string.replaceFirst("Hello", "Hello World, fish");                           //Word you want to change. Currently this code changes word "Solr" to "Solr123"                           previous.reset();                           previous.append(string.getBytes("ISO-8859-1"));                       }                       else if (op.getOperation().equals("TJ"))                       {                           COSArray previous = (COSArray) tokens.get(j - 1);                           for (int k = 0; k < previous.size(); k++)                           {                               Object arrElement = previous.getObject(k);                               if (arrElement instanceof COSString)                               {                                   COSString cosString = (COSString) arrElement;                                   String string = cosString.getString();                                   string = string.replaceFirst("Hello", "Hello World, fish");                                                                    // Currently this code changes word "Solr" to "Solr123"                                   cosString.reset();                                   cosString.append(string.getBytes("ISO-8859-1"));                               }                           }                       }                   }             }             // now that the tokens are updated we will replace the page content stream.               PDStream updatedStream = new PDStream(helloDocument);               OutputStream out = updatedStream.createOutputStream();               ContentStreamWriter tokenWriter = new ContentStreamWriter(out);               tokenWriter.writeTokens(tokens);               firstPage.setContents(updatedStream);               helloDocument.save("D:\\gloomyfish\\helloworld.pdf"); //Output file name               helloDocument.close(); //			PDFTextStripper textStripper = new PDFTextStripper(); //			System.out.println(textStripper.getText(helloDocument)); //			helloDocument.close(); 		} catch (IOException e) { 			// TODO Auto-generated catch block 			e.printStackTrace(); 		} catch (COSVisitorException e) { 			// TODO Auto-generated catch block 			e.printStackTrace(); 		} 	}  	public static void main(String[] args) { 		new PDFReader(); 	} } 

你可能感兴趣的:(Java使用PDFBox开发包实现对PDF文档内容编辑与保存)