使用xpdf将pdf文件转换为文本

使用xpdf将pdf文件转换为文本

// 读取PDF的内容
 public String getPdfContent(String filePath) {
  
  // 设置pdftotext所在的路径
  String excute = "E:\\JAR\\xpdf\\xpdf-3.02pl4-win32\\pdftotext.exe";
  //构造命令行里面的命令
  String[] cmd = new String[] { excute, "-enc", "UTF-8", "-q", filePath,"-" };
  Process p = null;
  try {
   // 调用本地命令,类似于在cmd里面敲上述命令
   p = Runtime.getRuntime().exec(cmd);
  } catch (IOException e) {
   e.printStackTrace();
  }
  //封装成字符流
  BufferedInputStream bis = new BufferedInputStream(p.getInputStream());
  InputStreamReader reader = null;

  try {
   reader = new InputStreamReader(bis, "UTF-8");
  } catch (UnsupportedEncodingException e1) {
   e1.printStackTrace();
  }

  StringBuffer sb = new StringBuffer();
  BufferedReader br = new BufferedReader(reader);
  String line;
  
  try {
   line = br.readLine();
   sb = new StringBuffer();
   while (line != null) {
    sb.append(line);
    sb.append(" ");
    line = br.readLine();
   }
  } catch (IOException e) {
   e.printStackTrace();
  }
  return sb.toString();
 }

你可能感兴趣的:(使用xpdf将pdf文件转换为文本)