一、POI对Word处理
1、读取Word
1、读取Excel
3、下载地址:http://www.apache.org/dyn/closer.cgi/poi/dev/
代码实现:
package com.qianyan.test; import java.io.File; import java.io.FileInputStream; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFChart.HSSFSeries; import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hwpf.extractor.WordExtractor; import org.junit.Ignore; import org.junit.Test; public class TestPOI { @Test // @Ignore public void testRead1(){ File file=new File("e:/test.doc"); try { FileInputStream fis=new FileInputStream(file); WordExtractor wordExtractor=new WordExtractor(fis); System.out.println(wordExtractor.getText()); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * 分段读操作 */ @Test @Ignore public void testPara(){ File file=new File("e:/test.doc"); try { FileInputStream fis=new FileInputStream(file); WordExtractor wordExtractor=new WordExtractor(fis); String [] paras=wordExtractor.getParagraphText(); for(int i=0;i<paras.length;i++){ System.out.print("第"+i+"段-->"); System.out.println(paras[i]); } } catch (Exception e) { e.printStackTrace(); } } /** * 一个Excel文件的层次:Excel文件-->工作表-->行-->单元格 对应到POI中,为:wordbook->sheet->cess */ /** * 读取excel,遍历各个小格获取其中信息 * <p/> * <p/> * 注意:1.sheet,以0开始,以workbook.getNumberOfSheets()-1结束 * 2.row,以0开始(getFirstRowNum),以getLastRowNum结束 * 3.cell,*以0开始(getFirstCellNum),以getLastCellNum结束 */ @Test @Ignore public void testReadExcel() throws Exception { //创建对Excel工作簿文件的引用 String fileToBeReade = "e:/test.xls"; HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(fileToBeReade)); System.out.println("===SheetNum===" + workbook.getNumberOfSheets()); //获取sheet数 if(null != workbook.getSheetAt(0)){ HSSFSheet aSheet = workbook.getSheetAt(0); //获得一个sheet System.out.println("+++getFirstRowNum+++" + aSheet.getFirstRowNum()); System.out.println("+++getLastRowNum+++" + aSheet.getLastRowNum()); for(int rowNumOfSheet = 0; rowNumOfSheet <= aSheet.getLastRowNum(); rowNumOfSheet++){ if(null != aSheet.getRow(rowNumOfSheet)){ HSSFRow aRow = aSheet.getRow(rowNumOfSheet); //System.out.println(">>>getFirstCellNum<<<" // + aRow.getFirstCellNum()); //System.out.println(">>>getLastCellNum<<<" // + aRow.getLastCellNum()); System.out.println("row" + rowNumOfSheet + "->"); for(int cellNumOfRow = 0; cellNumOfRow <= aRow.getLastCellNum() + 1; cellNumOfRow++){ if(null != aRow.getCell(cellNumOfRow)){ HSSFCell aCell = aRow.getCell(cellNumOfRow); int cellType = aCell.getCellType(); String strCell = ""; //System.out.println(" " + cellType + " "); switch(cellType){ case HSSFCell.CELL_TYPE_NUMERIC:{ //Numeric if(HSSFDateUtil.isCellDateFormatted(aCell)){ //如果是Date类型则,取得该Cell的Date值 strCell = HSSFDateUtil.getJavaDate(aCell.getNumericCellValue()).toString(); System.out.println(strCell + " "); }else{ strCell = aCell.getNumericCellValue() + ""; System.out.println(strCell + " "); } } break; case HSSFCell.CELL_TYPE_STRING://String strCell = aCell.getRichStringCellValue().toString(); System.out.println(strCell+ " "); break; default: System.out.println("");//其他格式的数据 } } } } } } } }
1、官方的解释是Java COM Bridge,即java和com组件间的桥梁
2、com一般表现为dll或exe等二进制文件
3、office是建立在windows平台之上的,本身是一个软件,除了他自己提供的宏似乎没有什么能对他进行直接的操作;在windows平台上为了解决像这样的不同应用软件,通信缺乏通用api问题,推出了com的解决方案;我们使用dll中的一组或多组相关的函数存取组件数据,总的合成为借口具体到每个细节的实现成为方法;如果我们需调用借口里的方法,唯一的途径就是调用指向借口的指针所以总的来说是使用就是dll完成api的转换。
4、Jacob的就是通过一个借口来操作word的activex对象。现在的版本是1.15
5、下载地址:http://sourceforge.net/projects/jacob-project/
代码实现:(注意:除添加jar包,还需jacob-1.17-M2-x64.dll jacob-1.17-M2-x84.dll两个文件复制到C:WINDOWS\SYSTEM32目录或者工程目录下)
package com.qianyan.test; import com.jacob.activeX.ActiveXComponent; import com.jacob.com.Dispatch; import com.jacob.com.Variant; public class TestJacob { public static void extractDoc(String inputFIle, String outputFile) { boolean flag = false; // 打开Word应用程序 ActiveXComponent app = new ActiveXComponent("Word.Application"); try { // 设置word不可见 app.setProperty("Visible", new Variant(false)); // 打开word文件 Dispatch doc1 = app.getProperty("Documents").toDispatch(); Dispatch doc2 = Dispatch.invoke( doc1, "Open", Dispatch.Method, new Object[] { inputFIle, new Variant(false), new Variant(true) }, new int[1]).toDispatch(); // 作为txt格式保存到临时文件 Variant(7) // 作为html格式保存到临时文件 Variant(8) Dispatch.invoke(doc2, "SaveAs", Dispatch.Method, new Object[] { outputFile, new Variant(8) }, new int[1]); // 关闭word Variant f = new Variant(false); Dispatch.call(doc2, "Close", f); flag = true; } catch (Exception e) { e.printStackTrace(); } finally { app.invoke("Quit", new Variant[] {}); } if (flag == true) { System.out.println("Transformed Successfully"); } else { System.out.println("Transform Failed"); } } public static void main(String[] args) { extractDoc("E:/test.doc","E:/test.htm"); } }