JAVA解析PDF、WORD、EXCEL文档

java解析pdf、doc、docx、xls、xlsx格式文档


读取PDF文件jar引用

		
			org.apache.pdfbox
			pdfbox
			1.8.13
		

读取WORD文件jar引用

		
			org.apache.poi
			poi-scratchpad
			3.16-beta1
		
		
			org.apache.poi
			poi
			3.16-beta1
		


读取EXCEL文件jar引用



	org.apache.xmlbeans
	xmlbeans
	2.6.0


	org.apache.poi
	poi-ooxml
	3.16-beta1


	org.apache.poi
	poi-ooxml-schemas
	3.16-beta1


读取WORD文件方法

/**
	 * 
	 * @Title: getTextFromWord
	 * @Description: 读取word
	 * @param filePath
	 *            文件路径
	 * @return: String 读出的Word的内容
	 */
	public static String getTextFromWord(String filePath) {
		String result = null;
		File file = new File(filePath);
		FileInputStream fis = null;
		try {
			fis = new FileInputStream(file);
			@SuppressWarnings("resource")
			WordExtractor wordExtractor = new WordExtractor(fis);
			result = wordExtractor.getText();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (fis != null) {
				try {
					fis.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		return result;
	}


读取PDF文件方法

	/**
	 * 
	 * @Title: getTextFromPdf
	 * @Description: 读取pdf文件内容
	 * @param filePath
	 * @return: 读出的pdf的内容
	 */
	public static String getTextFromPdf(String filePath) {
		String result = null;
		FileInputStream is = null;
		PDDocument document = null;
		try {
			is = new FileInputStream(filePath);
			PDFParser parser = new PDFParser(is);
			parser.parse();
			document = parser.getPDDocument();
			PDFTextStripper stripper = new PDFTextStripper();
			result = stripper.getText(document);
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (is != null) {
				try {
					is.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
			if (document != null) {
				try {
					document.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		return result;
	}

读取EXCEL,xls格式

	/**
	 * @param filePath
	 *            文件路径
	 * @return 读出的Excel的内容
	 */
	@SuppressWarnings({"resource", "deprecation"})
	public static String getTextFromExcel(String filePath) {
		StringBuffer buff = new StringBuffer();
		try {
			// 创建对Excel工作簿文件的引用
			HSSFWorkbook wb = new HSSFWorkbook(new FileInputStream(filePath));
			// 创建对工作表的引用。
			for (int numSheets = 0; numSheets < wb
					.getNumberOfSheets(); numSheets++) {
				if (null != wb.getSheetAt(numSheets)) {
					HSSFSheet aSheet = wb.getSheetAt(numSheets);// 获得一个sheet
					for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet
							.getLastRowNum(); rowNumOfSheet++) {
						if (null != aSheet.getRow(rowNumOfSheet)) {
							HSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 获得一个行
							for (int cellNumOfRow = 0; cellNumOfRow <= aRow
									.getLastCellNum(); cellNumOfRow++) {
								if (null != aRow.getCell(cellNumOfRow)) {
									HSSFCell aCell = aRow.getCell(cellNumOfRow);// 获得列值
									switch (aCell.getCellType()) {
										case HSSFCell.CELL_TYPE_FORMULA :
											break;
										case HSSFCell.CELL_TYPE_NUMERIC :
											buff.append(
													aCell.getNumericCellValue())
													.append('\t');
											break;
										case HSSFCell.CELL_TYPE_STRING :
											buff.append(
													aCell.getStringCellValue())
													.append('\t');
											break;
									}
								}
							}
							buff.append('\n');
						}
					}
				}
			}
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return buff.toString();
	}

读取EXCEL,xlxs格式

@SuppressWarnings("deprecation")
	public static String getTextFromExcel2007(String filePath) {
		StringBuffer buff = new StringBuffer();
		try {
			// 创建对Excel工作簿文件的引用
			@SuppressWarnings("resource")
			XSSFWorkbook wb = new XSSFWorkbook(new FileInputStream(filePath));
			// 创建对工作表的引用。
			for (int numSheets = 0; numSheets < wb
					.getNumberOfSheets(); numSheets++) {
				if (null != wb.getSheetAt(numSheets)) {
					XSSFSheet aSheet = wb.getSheetAt(numSheets);// 获得一个sheet
					for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet
							.getLastRowNum(); rowNumOfSheet++) {
						if (null != aSheet.getRow(rowNumOfSheet)) {
							XSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 获得一个行
							for (int cellNumOfRow = 0; cellNumOfRow <= aRow
									.getLastCellNum(); cellNumOfRow++) {
								if (null != aRow.getCell(cellNumOfRow)) {
									XSSFCell aCell = aRow.getCell(cellNumOfRow);// 获得列值
									switch (aCell.getCellType()) {
										case HSSFCell.CELL_TYPE_FORMULA :
											break;
										case HSSFCell.CELL_TYPE_NUMERIC :
											buff.append(
													aCell.getNumericCellValue())
													.append('\t');
											break;
										case HSSFCell.CELL_TYPE_STRING :
											buff.append(
													aCell.getStringCellValue())
													.append('\t');
											break;
									}
								}
							}
							buff.append('\n');
						}
					}
				}
			}
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return buff.toString();
	}





你可能感兴趣的:(JAVA)