POI解析word03和word07

所需要的jar包:

dom4j-1.6.1.jar

poi-3.8-20120326.jar

poi-examples-3.8-20120326.jar

poi-excelant-3.8-20120326.jar

poi-ooxml-3.8-20120326.jar

poi-ooxml-schemas-3.8-20120326.jar

poi-scratchpad-3.8-20120326.jar

stax-api-1.0.1.jar

xmlbeans-2.3.0.jar


POI解析word03(HWPFDocument)

public static void main(String[] args) {
		try {
			////word 2003: 图片不会被读取
			InputStream is = new FileInputStream(new File("files\\2003.doc"));
			WordExtractor ex = new WordExtractor(is);//is是WORD文件的InputStream 
			String text2003 = ex.getText();
			System.out.println(text2003);
			
		} catch (Exception e) {
			e.printStackTrace();
		}
	}


解析表格:

public static boolean word03(File file){
		/**word文档输入流*/
		InputStream fis = null;
		POIFSFileSystem pfs;
		/**word03文档对象*/
		HWPFDocument hdoc = null;
		try {	//载入文档
			fis = new FileInputStream(file);
			pfs = new POIFSFileSystem(fis);
			hdoc = new HWPFDocument(pfs);
		} catch (IOException e) {
			e.printStackTrace();
		}
		List tableList1 = getTables(hdoc);
			 //迭代文档中的表,默认从0开始    
		for(int i = 0; i 
  


解析word07XWPFDocument

public class Test {
	public static void main(String[] args) {
		try {
			//word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后
			OPCPackage opcPackage = POIXMLDocument.openPackage("files\\2007.docx");
			POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
			String text2007 = extractor.getText();
			System.out.println(text2007);
			
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}


解析图片:

public static void picture(File file){
		String imagePath = "G:/compare";
		File imageFile = new File(imagePath);
		String wordPath = file.getPath();
		OPCPackage opcPackage;
		XWPFDocument xwpfd = null;
		try {
			opcPackage = POIXMLDocument.openPackage(wordPath);
			xwpfd = new XWPFDocument(opcPackage);
		} catch (IOException e) {
			e.printStackTrace();
		}
		if(!imageFile.exists()){
			imageFile.mkdir();
		}
		List piclist = xwpfd.getAllPictures();
		for(int j = 0; j < piclist.size(); j++){
			
			XWPFPictureData  pic = (XWPFPictureData) piclist.get(j);
			byte[] picbyte = pic.getData();
			FileOutputStream fos = null;
			try {
				fos = new FileOutputStream(imagePath+"/"+file.getName()+".jpg");
			} catch (FileNotFoundException e) {
				e.printStackTrace();
			}
			try {
				fos.write(picbyte);
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}

解析word07表格

public static boolean Word(File file){
		List tableList = new ArrayList();
		OPCPackage opcPackage = null;
		XWPFDocument doc = null;
		try {
			opcPackage = POIXMLDocument.openPackage(file.getPath());
			doc = new XWPFDocument(opcPackage);
		} catch (IOException e) {
			e.printStackTrace();
		}
		tableList = doc.getTables();
		for(int i = 0; i rows1 = table1.getRows();
    		for(int j = 0; j cells1 = rows1.get(j).getTableCells();
    			for(int k = 0; k <  cells1.size(); k++){//遍历列
    				System.out.print("value="+cells1.get(k).getText()+" color="+cells1.get(k).getColor()+"\t");
        		}
	        }
		}
		return true;
	}

解析word07文本(只是文本)


public static boolean Word(File file){
		List tableList = new ArrayList();
		OPCPackage opcPackage = null;
		XWPFDocument doc = null;
		try {
			opcPackage = POIXMLDocument.openPackage(file.getPath());
			doc = new XWPFDocument(opcPackage);
		} catch (IOException e) {
			e.printStackTrace();
		}
		List paras = doc.getParagraphs();
		for(int i = 0;i < paras.size() ; i++){
			System.out.println(paras.get(i).getText());
		}
		
		return true;
	}


POI API:http://pan.baidu.com/share/link?shareid=1163170766&uk=672711972


你可能感兴趣的:(文件读取)