要读取pdf文件,必须有pdfbox1.7.1.jar和fontbox1.7.1.jar,可以到官网下,也可以到我的资源里下,下面代码
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
public class Mytest {
public static String getText(String file) {
String s = "";
String pdffile = file;
PDDocument pdfdoc = null;
try {
pdfdoc = PDDocument.load(pdffile);
PDFTextStripper stripper = new PDFTextStripper();
s = stripper.getText(pdfdoc);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (pdfdoc != null) {
pdfdoc.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return s;
}
public static void toTextFile(String doc, String filename) throws Exception {
String pdffile = doc;
PDDocument pdfdoc = null;
PrintWriter pw = null;
try {
pdfdoc = PDDocument.load(pdffile);
PDFTextStripper stripper = new PDFTextStripper();
pw = new PrintWriter(new FileWriter(filename));
stripper.writeText(pdfdoc, pw);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(pw!=null){
pw.close();
}
if (pdfdoc != null) {
pdfdoc.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) {
try {
String sc = getText("在这里输入要转换的pdf文件路径");
System.out.print(sc);
toTextFile("在这里输入要转换的pdf文件路径",
"要保存的txt文件路径");
} catch (Exception e) {
e.printStackTrace();
}
}
}