下载 最新poi-3.7
http://www.apache.org/dyn/closer.cgi/poi/release/bin/poi-bin-3.7-20101029.tar.gz 解压 导入 所有jar
读取 excel 2003---excle2007
package com.htsoft.oa.core;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
public class ExcelReader {
@SuppressWarnings("deprecation")
/**
* @param filePath 文件路径
* @return 读出的Excel的内容
*/
public static void main(String args[]) {
String xlsText2003;
String xlsText2007;
try {
xlsText2003 = ExcelReader.extractTextFromXLS2007("d:/abcd.xls");
xlsText2007 = ExcelReader.extractTextFromXLS2007("d:/abcd.xlsx");
System.out.println(xlsText2003);
System.out.println(xlsText2007);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static String getTextFromExcel2003(String filePath) {
StringBuffer buff = new StringBuffer();
try {
// 创建对Excel工作簿文件的引用
HSSFWorkbook wb = new HSSFWorkbook(new FileInputStream(filePath));
// 创建对工作表的引用。
for (int numSheets = 0; numSheets < wb.getNumberOfSheets(); numSheets++) {
if (null != wb.getSheetAt(numSheets)) {
HSSFSheet aSheet = wb.getSheetAt(numSheets);
for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet
.getLastRowNum(); rowNumOfSheet++) {
if (null != aSheet.getRow(rowNumOfSheet)) {
HSSFRow aRow = aSheet.getRow(rowNumOfSheet);
for (int cellNumOfRow = 0; cellNumOfRow <= aRow
.getLastCellNum(); cellNumOfRow++) {
if (null != aRow.getCell(cellNumOfRow)) {
HSSFCell aCell = aRow.getCell(cellNumOfRow);
switch (aCell.getCellType()) {
case HSSFCell.CELL_TYPE_FORMULA:
break;
case HSSFCell.CELL_TYPE_NUMERIC:
buff.append(aCell.getNumericCellValue())
.append('/t');
break;
case HSSFCell.CELL_TYPE_STRING:
buff.append(aCell.getStringCellValue())
.append('/t');
break;
}
}
}
buff.append('/n');
}
}
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return buff.toString();
}
/**
* @Method: extractTextFromXLS2007
* @Description: 从excel 2007文档中提取纯文本
*
* @param
* @return String
* @throws
*/
private static String extractTextFromXLS2007(String fileName)
throws Exception {
StringBuffer content = new StringBuffer();
// 构造 XSSFWorkbook 对象,strPath 传入文件路径
XSSFWorkbook xwb = new XSSFWorkbook(fileName);
// 循环工作表Sheet
for (int numSheet = 0; numSheet < xwb.getNumberOfSheets(); numSheet++) {
XSSFSheet xSheet = xwb.getSheetAt(numSheet);
if (xSheet == null) {
continue;
}
// 循环行Row
for (int rowNum = 0; rowNum <= xSheet.getLastRowNum(); rowNum++) {
XSSFRow xRow = xSheet.getRow(rowNum);
if (xRow == null) {
continue;
}
// 循环列Cell
for (int cellNum = 0; cellNum <= xRow.getLastCellNum(); cellNum++) {
XSSFCell xCell = xRow.getCell(cellNum);
if (xCell == null) {
continue;
}
if (xCell.getCellType() == XSSFCell.CELL_TYPE_BOOLEAN) {
content.append(xCell.getBooleanCellValue());
} else if (xCell.getCellType() == XSSFCell.CELL_TYPE_NUMERIC) {
content.append(xCell.getNumericCellValue());
} else {
content.append(xCell.getStringCellValue());
}
}
}
}
return content.toString();
}
}
====================================================================
读取 word2003 word 2007
package com.htsoft.oa.core;
import java.io.*;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
public class WordReader {
public static void main(String args[]){
//String textDoc = WordReader.getTextFromWord2003("d:/abc.doc");
String textDoc = WordReader.getTextFromWord2007("d:/abc1.docx");
System.out.println(textDoc);
}
/**
* @param filePath 文件路径
* @return 读出的Word2003的内容
*/
public static String getTextFromWord2003(String filePath){
String result = null;
File file = new File(filePath);
try{
FileInputStream fis = new FileInputStream(file);
WordExtractor wordExtractor = new WordExtractor(fis);
result = wordExtractor.getText();
}catch(FileNotFoundException e){
e.printStackTrace();
}catch(IOException e){
e.printStackTrace();
};
return result;
}
/**
* @param filePath 文件路径
* @return 读出的Word2007的内容
*/
public static String getTextFromWord2007(String filePath){
//word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后
OPCPackage opcPackage;
String text2007 = null;
try {
opcPackage = POIXMLDocument.openPackage(filePath);
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
text2007 = extractor.getText();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return text2007;
}
}