XPDF把pdf转换成text文件

package com.hdzx.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;

public class PdfToHtmlUtil {
	private static String INPUT_PATH; //要转换的文件路径
	private static String PROJECT_PATH; //转换后文件放在哪个位置上

	public static void convertToHtml(String file, String project) {
		INPUT_PATH = file;
		PROJECT_PATH = project;
		if (checkContentType() == 0) {
			toHtml();
		}
	}
	/**
	 * 检查文件是否是pdf类型的
	 * @return
	 */
	private static int checkContentType() {
		String type = INPUT_PATH.substring(INPUT_PATH.lastIndexOf(".") + 1,
				INPUT_PATH.length()).toLowerCase();
		System.out.println("type:" + type);
		if (type.equals("pdf")){
				System.out.println("pdf");
				return 0;
		}
		else {
			System.out.println("not pdf");
			return 9;
		}
	}
	/**
	 * 调用批处理文件生成文本文件
	 */
	private static void toHtml() {
		if (new File(INPUT_PATH).isFile()) {
			try {
				String cmd ="cmd /k  start D:\\Xpdf\\pdftotext.bat \""
					+ INPUT_PATH + "\" \"" + PROJECT_PATH + "\"";
				System.out.println("cmd:" + cmd);
				Runtime.getRuntime().exec(cmd);
				System.out.println("OK");
			} catch (IOException e) {
				e.printStackTrace();
				System.out.println("error");
			}
		}
	}
	
	public static void main(String[] args) {
		String fileStr = "D:\\Xpdf\\slzw.pdf";
		setPDFtoHTML(fileStr);
	}
	
	public static void setPDFtoHTML(String fileStr) {
		String path = "D:\\Xpdf";
		convertToHtml(fileStr, path);
		String strFile = "D:\\Xpdf\\slzw.txt";
		try {
			Thread.sleep(2000);
			File file = new File(strFile);
			FileReader fr = new FileReader(file);
			BufferedReader read = new BufferedReader(fr);
			StringBuffer sb = new StringBuffer();
			String str="";
			
			while((str=read.readLine()) != null){
				sb.append(str);
				sb.append("\n");
			}
			read.close();
			fr.close();
			System.out.println(sb.toString().replaceAll("", ""));
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

}

你可能感兴趣的:(text)