所需要的jar包:
dom4j-1.6.1.jar
poi-3.8-20120326.jar
poi-examples-3.8-20120326.jar
poi-excelant-3.8-20120326.jar
poi-ooxml-3.8-20120326.jar
poi-ooxml-schemas-3.8-20120326.jar
poi-scratchpad-3.8-20120326.jar
stax-api-1.0.1.jar
xmlbeans-2.3.0.jar
POI解析word03(HWPFDocument)
public static void main(String[] args) {
try {
////word 2003: 图片不会被读取
InputStream is = new FileInputStream(new File("files\\2003.doc"));
WordExtractor ex = new WordExtractor(is);//is是WORD文件的InputStream
String text2003 = ex.getText();
System.out.println(text2003);
} catch (Exception e) {
e.printStackTrace();
}
}
解析表格:
public static boolean word03(File file){
/**word文档输入流*/
InputStream fis = null;
POIFSFileSystem pfs;
/**word03文档对象*/
HWPFDocument hdoc = null;
try { //载入文档
fis = new FileInputStream(file);
pfs = new POIFSFileSystem(fis);
hdoc = new HWPFDocument(pfs);
} catch (IOException e) {
e.printStackTrace();
}
List tableList1 = getTables(hdoc);
//迭代文档中的表,默认从0开始
for(int i = 0; i
解析word07XWPFDocument
public class Test {
public static void main(String[] args) {
try {
//word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后
OPCPackage opcPackage = POIXMLDocument.openPackage("files\\2007.docx");
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
String text2007 = extractor.getText();
System.out.println(text2007);
} catch (Exception e) {
e.printStackTrace();
}
}
}
解析图片:
public static void picture(File file){
String imagePath = "G:/compare";
File imageFile = new File(imagePath);
String wordPath = file.getPath();
OPCPackage opcPackage;
XWPFDocument xwpfd = null;
try {
opcPackage = POIXMLDocument.openPackage(wordPath);
xwpfd = new XWPFDocument(opcPackage);
} catch (IOException e) {
e.printStackTrace();
}
if(!imageFile.exists()){
imageFile.mkdir();
}
List piclist = xwpfd.getAllPictures();
for(int j = 0; j < piclist.size(); j++){
XWPFPictureData pic = (XWPFPictureData) piclist.get(j);
byte[] picbyte = pic.getData();
FileOutputStream fos = null;
try {
fos = new FileOutputStream(imagePath+"/"+file.getName()+".jpg");
} catch (FileNotFoundException e) {
e.printStackTrace();
}
try {
fos.write(picbyte);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
解析word07表格
public static boolean Word(File file){
List tableList = new ArrayList();
OPCPackage opcPackage = null;
XWPFDocument doc = null;
try {
opcPackage = POIXMLDocument.openPackage(file.getPath());
doc = new XWPFDocument(opcPackage);
} catch (IOException e) {
e.printStackTrace();
}
tableList = doc.getTables();
for(int i = 0; i rows1 = table1.getRows();
for(int j = 0; j cells1 = rows1.get(j).getTableCells();
for(int k = 0; k < cells1.size(); k++){//遍历列
System.out.print("value="+cells1.get(k).getText()+" color="+cells1.get(k).getColor()+"\t");
}
}
}
return true;
}
解析word07文本(只是文本)
public static boolean Word(File file){
List tableList = new ArrayList();
OPCPackage opcPackage = null;
XWPFDocument doc = null;
try {
opcPackage = POIXMLDocument.openPackage(file.getPath());
doc = new XWPFDocument(opcPackage);
} catch (IOException e) {
e.printStackTrace();
}
List paras = doc.getParagraphs();
for(int i = 0;i < paras.size() ; i++){
System.out.println(paras.get(i).getText());
}
return true;
}
POI API:http://pan.baidu.com/share/link?shareid=1163170766&uk=672711972