java : pdfbox 读取 PDF文件内书签

从 https://pdfbox.apache.org/ 下载
pdfbox-2.0.9.jar fontbox-2.0.9.jar
commons-logging-1.2.jar

其实用一个 pdfbox-app-2.0.9.jar 也行

pdfbox-2.0.9-src.zip

\pdfbox-2.0.9-src\examples\src\main\java\org\apache\pdfbox\examples\pdmodel\PrintBookmarks.java

package test;
import java.io.*;

import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;

import org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineNode;

public class PrintBookmarks
{

    /**
     * This will print the documents 书签 to System.out.
     *
     * @param bookmark The 书签 to print out.
     * @param indentation A pretty printing parameter
     *
     * @throws IOException If there is an error getting the page count.
     */
    public void printBookmark( PDOutlineNode bookmark, String indentation ) throws IOException
    {
        
        PDOutlineItem current = bookmark.getFirstChild();
        while( current != null )
        {
        	int pages =0;
	        if (current.getDestination() instanceof PDPageDestination)
			{
			    PDPageDestination pd = (PDPageDestination) current.getDestination();
			    pages = (pd.retrievePageNumber() +1);
			}
			if (current.getAction() instanceof PDActionGoTo)
			{
			    PDActionGoTo gta = (PDActionGoTo) current.getAction();
			    if (gta.getDestination() instanceof PDPageDestination)
			    {
			        PDPageDestination pd = (PDPageDestination) gta.getDestination();
			        pages = (pd.retrievePageNumber() +1);
			    }
			}
			if (pages ==0)
				System.out.println( indentation + current.getTitle());
			else
				System.out.println( indentation + current.getTitle() +"  "+ pages);
            printBookmark( current, indentation + "    " );  // 递归调用
            current = current.getNextSibling();
        }

    }
    
    // 
    public static void main( String[] args ) throws Exception
    {
        if( args.length != 1 )
        {
            System.out.println( " usage: java PrintBookmarks file1.pdf " );
            return;
        }

		File file1 = new File(args[0]);
		if (!file1.exists()){
			System.err.println(" file is not exists ");
			return; 
		}
		//  开始读取 PDF文档
            PDDocument document = null;
            FileInputStream fis = null;
            try
            {
                fis = new FileInputStream(file1);
                PDFParser parser = new PDFParser(new RandomAccessBuffer(fis));
                parser.parse();
                document = parser.getPDDocument();

                PrintBookmarks the = new PrintBookmarks();
                PDDocumentOutline outline =  document.getDocumentCatalog().getDocumentOutline();
                if( outline != null )
                {
                    the.printBookmark( outline, "" );
                }
                else
                {
                    System.out.println( "This document does not contain any bookmarks" );
                }
            }
            finally
            {
                if( fis != null ) fis.close();
                if( document != null ) document.close();
            }
        
    }
}

编译 compile.bat

set JAR=pdfbox-app-2.0.9.jar
javac -cp %JAR% -d . PrintBookmarks.java

运行 run1.bat

set JAR=pdfbox-app-2.0.9.jar
java -Xms128m -Xmx512m -cp %JAR%;. test.PrintBookmarks %1

例如: cmd

run1.bat Hadoop权威指南_第四版_中文版.pdf > hadoop4.txt

你可能感兴趣的:(java)