java 使用pdfbox批量转换pdf写入txt

1. 包引用

<dependency>
  <groupId>org.apache.pdfbox</groupId>
  <artifactId>pdfbox</artifactId>
  <version>2.0.24</version>
</dependency>

2 Demo

package pdf;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.Scanner;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

public class Pdf2Txt {
	
	public static String base_path = "";
	
	public static void main(String[] args) {
		Scanner sc = new Scanner(System.in);
		System.out.println("请输入pdf文件夹所在的目录:");
		String path = sc.nextLine();
		base_path = path + "/";
		sc.close();
		getFiles();
	}
	
	public static void getFiles() {
		
		File file = new File(base_path+"pdfs");
		File[] fileArray = file.listFiles();
		if(fileArray == null || fileArray.length == 0) {
			System.out.print("未查找到pdf文件,请检查目录是否正确...");
			System.exit(0);
			return;
		}
		for(int i = 0; i < fileArray.length; i++) {
			File f = fileArray[i];
			String fileName = f.getName();
			getTxt(fileName);
		}
	}
	
	public static void getTxt(String name) {
		try {
			// pdfs为存放pdf文件的文件夹,可自行修改为其它文件夹
			PDDocument doc = PDDocument.load(new File(base_path + "pdfs/" +name));
			if(doc.isEncrypted()) {
				System.out.println(name + "文档被加密,无法解析....");
				return;
			}
			
			PDFTextStripper stripper = new PDFTextStripper();
			writedText(stripper.getText(doc), name);
		} catch(Exception e){
			e.printStackTrace();
		}
	}
	
	public static void writedText(String result, String name) {
		String fileNameWithoutExtension = name.substring(0, name.lastIndexOf('.'));
		try {
			// txts为存放结果的文件夹,可自行修改此名称,并事先新建好。
			String path = base_path + "txts/" + fileNameWithoutExtension + ".txt";
			File file = new File(path);
			if(!file.exists()) {
				file.createNewFile();
			}
			
			OutputStream os = new FileOutputStream(file);
			OutputStreamWriter writer = new OutputStreamWriter(os);
			writer.write(result);
			writer.close();
			os.close();
			System.out.println(name + " : 提取完成...");
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	

}

你可能感兴趣的:(java工具类,java,pdf,jvm)