word模板:https://download.csdn.net/download/weixin_41420919/85708792
问题: 需要读取任免编辑器生成的word里面的内容,其中生成的word是一个表格,需要根据表格的行列数获取相应的内容。
难点:在于表格里有图片,以及单元格的合并。
解决方案:
<dependency>
<groupId>org.apache.poigroupId>
<artifactId>poiartifactId>
<version>3.15version>
dependency>
<dependency>
<groupId>org.apache.poigroupId>
<artifactId>poi-ooxmlartifactId>
<version>3.15version>
dependency>
<dependency>
<groupId>org.apache.poigroupId>
<artifactId>poi-scratchpadartifactId>
<version>3.15version>
dependency>
//注意测试程序没有关闭流,实际使用要关闭流
String filePath = "D:\\xujinwei\\yz_dagl\\testWord\\任免表99.doc";
WordImportUtil test = new WordImportUtil(filePath);
InputStream is;
FileInputStream in = new FileInputStream(filePath);// 载入文档
POIFSFileSystem pfs = new POIFSFileSystem(in);
HWPFDocument hwpf = new HWPFDocument(pfs);
Range range = hwpf.getRange();// 得到文档的读取范围
TableIterator it = new TableIterator(range);
Map<String,String> map = new HashMap<>();
int tt = 1;
while (it.hasNext()) {
Table tb = it.next();
System.out.println("=========================================第"+tt+"个表格==");
// 迭代行,默认从0开始
int i = 0;
for ( ; i < tb.numRows(); i++) {
TableRow tr = tb.getRow(i);
// 迭代列,默认从0开始
int j = 0;
for ( ; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);// 取得单元格
// 取得单元格的内容
String s = "";
for (int k = 0; k < td.numParagraphs(); k++) {
Paragraph para = td.getParagraph(k);// 获取第k个段落
s += para.text().trim();
}
map.put(s, "i:" + i + " " + "j:" + j);
}
map.forEach((x,y)->{
System.out.println("===========================key========"+x);
System.out.println("===========================value========"+y);
});
map.clear();
}
tt++;
}
//获取图片
PicturesTable picturesTable = hwpf.getPicturesTable();
List<Picture> allPictures = picturesTable.getAllPictures();
for (Picture picture : allPictures) {
String picBase64Str = Base64Utils.encodeToString(picture.getContent());
}
return "";
这里对所有表格的字段进行打印,实测时,我发现我的word文档一个表格被解析成三个表格。从第二个表格起才是我所需要的数据以及因为有些单元格合并,获取的字段不对, 因此我还做了特殊处理。下面提供一个对合并的单元格的解决方案
for ( ; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);// 取得单元格
// 取得单元格的内容
String s = "";
//这个s是合并单元格的结果,如果想要获取单独的可以在k那里进行操作,可以通过判断k的值,直接return
//para.text().trim();
for (int k = 0; k < td.numParagraphs(); k++) {
Paragraph para = td.getParagraph(k);// 获取第k个段落
s += para.text().trim();
}
}
package com.hrtac.application.util;
import com.hrtac.application.bean.xt001.DA051Bean;
import com.hrtac.application.bean.xt001.DA052Bean;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.*;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.springframework.util.Base64Utils;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Description:
* User: xu jin wei
* Date: 2022-06-15
* Time: 8:44
*/
public class WordImportUtil {
private String filePath ;
private HWPFDocument hwpf;
private InputStream in;
public WordImportUtil(String filePath){
this.in = null;// 载入文档
try {
in = new FileInputStream(filePath);
POIFSFileSystem pfs = new POIFSFileSystem(in);
this.hwpf = new HWPFDocument(pfs);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public String getTableText(int tableIndex, int row ,int col, boolean isSpecial){
int noSpecial = 3;
return getTableText(tableIndex, row ,col, isSpecial, noSpecial);
}
public String getTableText(int tableIndex, int row ,int col, boolean isSpecial, int specialParagraph){
Range range = hwpf.getRange();// 得到文档的读取范围
TableIterator it = new TableIterator(range);
List<String> list = new ArrayList<>();
Map<String,String> map = new HashMap<>();
StringBuffer errMsg = new StringBuffer();
int tt = 1;
int i =0;
while (it.hasNext()) {
Table tb = it.next();
if(i==tableIndex) {
if(row>=tb.numRows()){
return "";
}
TableRow tr = tb.getRow(row);
TableCell td = tr.getCell(col);// 取得单元格
// 取得单元格的内容
String s = "";
for (int k = 0; k < td.numParagraphs(); k++) {
Paragraph para = td.getParagraph(k);// 获取第k个段落
s += para.text().trim();
if(isSpecial && row==6 && col==2 && specialParagraph==k ){
//在职教育-学历
return para.text().trim();
}
if(isSpecial && row==6 && col==2 && specialParagraph==k ){
//在职教育-学位
return para.text().trim();
}
if(isSpecial && row==6 && col==4 && specialParagraph==k ){
//在职教育-毕业院校
return para.text().trim();
}
if(isSpecial && row==6 && col==4 && specialParagraph==k ){
//在职教育-毕业专业
return para.text().trim();
}
}
return s;
}
i++;
}
return "";
}
public void closeStream() {
if(this.hwpf !=null){
try {
this.hwpf.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (this.in != null) {
try {
this.in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public DA051Bean getDA051BaseInfoFromWord(WordImportUtil wordImportUtil) {
DA051Bean da051Bean = new DA051Bean();
int tableIndex = 1;
da051Bean.setXM00001(wordImportUtil.getTableText(tableIndex,0,1,false));
da051Bean.setXB00001(wordImportUtil.getTableText(tableIndex,0,3,false));
String CSNY001 = wordImportUtil.getTableText(tableIndex, 0, 5,false);
if(StringUtils.isNotEmpty(CSNY001)) CSNY001 = CSNY001.substring(0,CSNY001.indexOf("(")).replace(".","");
da051Bean.setCSNY001(CSNY001);
da051Bean.setMZ00001(wordImportUtil.getTableText(tableIndex,1,1,false));
da051Bean.setJG00001(wordImportUtil.getTableText(tableIndex,1,3,false));
da051Bean.setCSD0001(wordImportUtil.getTableText(tableIndex,1,5,false));
da051Bean.setRDSJ001(wordImportUtil.getTableText(tableIndex,2,1,false).replace(".",""));
da051Bean.setCJGZ002(wordImportUtil.getTableText(tableIndex,2,3,false).replace(".",""));
da051Bean.setJKZK001(wordImportUtil.getTableText(tableIndex,2,5,false));
da051Bean.setZYJS007(wordImportUtil.getTableText(tableIndex,3,1,false));
da051Bean.setZYTC001(wordImportUtil.getTableText(tableIndex,3,3,false));
da051Bean.setXLMC001(wordImportUtil.getTableText(tableIndex,4,2,false));
da051Bean.setXWMC001(wordImportUtil.getTableText(tableIndex,5,2,false));
da051Bean.setBYYX001(wordImportUtil.getTableText(tableIndex,4,4,false));
da051Bean.setBYZY001(wordImportUtil.getTableText(tableIndex,5,4,false));
// 在职教育 学历和学位合并了 院校和专业也合并了
da051Bean.setXLMC002(wordImportUtil.getTableText(tableIndex,6,2,true,0));
da051Bean.setXWMC002(wordImportUtil.getTableText(tableIndex,6,2,true,1));
da051Bean.setBYYX002(wordImportUtil.getTableText(tableIndex,6,4,true,0));
da051Bean.setBYZY002(wordImportUtil.getTableText(tableIndex,6,4,true,1));
da051Bean.setXRZW001(wordImportUtil.getTableText(tableIndex,8,1,false));
da051Bean.setNRZW001(wordImportUtil.getTableText(tableIndex,9,1,false));
da051Bean.setNMZW001(wordImportUtil.getTableText(tableIndex,10,1,false));
da051Bean.setJL00001(wordImportUtil.getTableText(tableIndex,11,1,false));
tableIndex++;
da051Bean.setJCQK001(wordImportUtil.getTableText(tableIndex,0,1,false));
da051Bean.setNDKH001(wordImportUtil.getTableText(tableIndex,1,1,false));
da051Bean.setRMLY001(wordImportUtil.getTableText(tableIndex,2,1,false));
//和家庭成员有关 7 --11 8--12 9--13 10--14
da051Bean.setCBDW001(wordImportUtil.getTableText(tableIndex,getCBDW(wordImportUtil),1,false));
return da051Bean;
}
public int getCBDW(WordImportUtil wordImportUtil){
int index = -1;
if("呈报单位".equals(wordImportUtil.getTableText(2,11,0,false))){
index = 11;
};
if("呈报单位".equals(wordImportUtil.getTableText(2,12,0,false))){
index = 12;
};
if("呈报单位".equals(wordImportUtil.getTableText(2,13,0,false))){
index = 13;
};
if("呈报单位".equals(wordImportUtil.getTableText(2,14,0,false))){
index = 14;
};
return index;
}
public List<DA052Bean> getDa052BeanListFormWord(WordImportUtil wordImportUtil){
int index = getCBDW(wordImportUtil);
List<DA052Bean> list = new ArrayList<>();
for(int row = 4; row < index; row ++){
DA052Bean da052Bean = new DA052Bean();
int col = 1;
da052Bean.setCYCW001(wordImportUtil.getTableText(2,row,col,false));
da052Bean.setCYXM001(wordImportUtil.getTableText(2,row,++col,false));
da052Bean.setCYCS001(wordImportUtil.getTableText(2,row,++col,false));
da052Bean.setCYZZ001(wordImportUtil.getTableText(2,row,++col,false));
da052Bean.setCYDW001(wordImportUtil.getTableText(2,row,++col,false));
da052Bean.setXH00001(String.valueOf((row-3)));
list.add(da052Bean);
}
for(int k = list.size(); k <10 ;k++){
DA052Bean da052Bean = new DA052Bean();
da052Bean.setXH00001(String.valueOf( (k + 1)));
list.add(da052Bean);
}
return list;
}
public String getPicBase64String(WordImportUtil wordImportUtil) {
PicturesTable picturesTable = wordImportUtil.hwpf.getPicturesTable();
List<Picture> allPictures = picturesTable.getAllPictures();
for (Picture picture : allPictures) {
return Base64Utils.encodeToString(picture.getContent());
}
return "";
}
public static void main(String[] args)throws Exception {
String filePath = "D:\\xujinwei\\yz_dagl\\testWord\\任免表99.doc";
WordImportUtil test = new WordImportUtil(filePath);
InputStream is;
FileInputStream in = new FileInputStream(filePath);// 载入文档
POIFSFileSystem pfs = new POIFSFileSystem(in);
HWPFDocument hwpf = new HWPFDocument(pfs);
Range range = hwpf.getRange();// 得到文档的读取范围
TableIterator it = new TableIterator(range);
Map<String,String> map = new HashMap<>();
int tt = 1;
while (it.hasNext()) {
Table tb = it.next();
System.out.println("=========================================第"+tt+"个表格==");
// 迭代行,默认从0开始
// if(tt==2) {
int i = 0;
for ( ; i < tb.numRows(); i++) {
TableRow tr = tb.getRow(i);
// 迭代列,默认从0开始
int j = 0;
for ( ; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);// 取得单元格
// 取得单元格的内容
String s = "";
for (int k = 0; k < td.numParagraphs(); k++) {
Paragraph para = td.getParagraph(k);// 获取第k个段落
s += para.text().trim();
}
map.put(s, "i:" + i + " " + "j:" + j);
}
map.forEach((x,y)->{
System.out.println("===========================key========"+x);
System.out.println("===========================value========"+y);
});
map.clear();
}
// }
tt++;
}
/*WordUtils wordUtils = new WordUtils(false);
wordUtils.openDocument(filePath);
Dispatch doc = wordUtils.getDoc();
// 获取所有表格
Dispatch tables = Dispatch.get(doc, "Tables").toDispatch();
// 获取表格数目
int tablesCount = Dispatch.get(tables, "Count").getInt();
System.out.println("文档中共有" + tablesCount + "个表格");
for (int i = 1; i <= tablesCount; i++) {
// 获取第i个表格
Dispatch table = Dispatch.call(tables, "Item", new Variant(i)).toDispatch();
// 获取该表格所有行
Dispatch rows = Dispatch.call(table, "Rows").toDispatch();
// 获取该表格所有列
Dispatch columns = Dispatch.call(table, "Columns").toDispatch();
// 获取该表格行数
int rowsCount = Dispatch.get(rows, "Count").getInt();
// 获取该表格列数
int columnsCount = Dispatch.get(columns, "Count").getInt();
for (int j = 1; j <= rowsCount; j++) {
for (int k = 1; k <= columnsCount; k++) {
Dispatch cell = Dispatch.call(table, "Cell", new Variant(j), new Variant(k)).toDispatch();
Dispatch Range = Dispatch.get(cell, "Range").toDispatch();
String text = Dispatch.get(Range, "Text").getString();
//去掉最后的回车符;
text = text.substring(0, text.length() - 2);
System.out.println("单元格中的内容:" + text);
}
}
}
*/
// test.testWord(filePath);
// test.spireForTableOfDoc(filePath);
}
}