最近在弄pdf相关的东西,想搞一个根据关键字定位在pdf中的坐标位置,然后好在该位置放置一个空白签名域
参考了网上的代码,然后我根据网上的代码进行了一些改造,现在贴出来
参考文章:https://blog.csdn.net/weixin_43145779/article/details/83115766
com.itextpdf
itextpdf
5.5.13
com.itextpdf
itext-asian
5.2.0
com.itextpdf
itext-xtra
5.5.13
com.itextpdf
itext-pdfa
5.5.13
com.itextpdf.tool
xmlworker
5.5.13
junit
junit
${junit.version}
test
org.bouncycastle
bcprov-jdk15on
1.49
jar
org.bouncycastle
bcpkix-jdk15on
1.49
jar
org.jfree
jfreechart
1.0.19
org.apache.commons
commons-lang3
3.4
pom中的java包可能会有些多,因为还有其他的测试,所以没有去掉,另外也去了一些jar包,如果用不了少jar包就评论说一下,去掉的感觉是没用到的
KeywordPDFUtils.java
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.Lists;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.ImageRenderInfo;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.RenderListener;
import com.itextpdf.text.pdf.parser.TextRenderInfo;
public class KeywordPDFUtils {
/**
* 将pdf内容按页面读取map中,map中的key为pdf的页面,value为该页的内容块list
* @param filePath
* @return
*/
public static Map> getPDFText(String filePath) {
Map> map = new HashMap>();
try {
PdfReader pdfReader = new PdfReader(filePath);
int pageNum = pdfReader.getNumberOfPages();
PdfReaderContentParser pdfReaderContentParser = new PdfReaderContentParser(pdfReader);
for (int i = 1; i <= pageNum; i++) {
List lists = Lists.newArrayList();
List pagelist = new ArrayList();
pagelist.add(0, i);
pdfReaderContentParser.processContent(i, new RenderListener() {
@Override
public void renderText(TextRenderInfo textRenderInfo) {
String text = textRenderInfo.getText(); // 整页内容
com.itextpdf.awt.geom.Rectangle2D.Float boundingRectange = textRenderInfo.getBaseline()
.getBoundingRectange();
KeyWordBean bean = new KeyWordBean();
bean.setX(boundingRectange.x);
bean.setY(boundingRectange.y);
bean.setPage(pagelist.get(0));
bean.setText(text);
lists.add(bean);
}
@Override
public void renderImage(ImageRenderInfo arg0) {
}
@Override
public void endTextBlock() {
}
@Override
public void beginTextBlock() {
}
});
map.put(i, lists);
}
} catch (IOException e) {
e.printStackTrace();
}
return map;
}
/**
* 根据第几页第几个关键字查找该关键字的xy坐标
* @param map2 pdf内容块,以页为key
* @param page 页
* @param num 页中的第几个
* @param keyWord 关键字
* @return
*/
public static KeyWordBean getKeyWordXY(Map> map2, int page, int num, String keyWord) {
List list=getKeyWordXY(map2,keyWord);
for(KeyWordBean bean:list) {
if(bean.getPage()==page && bean.getNum()==num) {
return bean;
}
}
return null;
}
/**
* 根据关键字查找该关键字在pdf中的xy坐标list
* @param map2 key为pdf的页,value为该的内容块list
* @param keyWord
* @return
*/
public static List getKeyWordXY(Map> map2,String keyWord) {
int keyMatch = 1;
StringBuilder content = new StringBuilder();
List keywordlist= new ArrayList();
for(int page:map2.keySet()) {
List list=map2.get(page);
Collections.sort(list);// 正序比较
for (int i = 0; i < list.size(); i++) {
KeyWordBean bean = list.get(i);
String text = bean.getText();
if (i + 1 != list.size()) {
KeyWordBean beanNext = list.get(i + 1);
float x = beanNext.getX() - bean.getX();
float y = beanNext.getY() - bean.getY();
if (y == 0 && x <= 1) {
} else {
if (StringUtils.contains(content.toString(), keyWord) || StringUtils.contains(text, keyWord)) {
bean.setNum(keyMatch++);
keywordlist.add(bean);
} else if ((!StringUtils.isEmpty(text) && keyWord.startsWith(text)) || content.length() > 0) {
content.append(text);
if (content.length() >= keyWord.length()) {
if (StringUtils.contains(content.toString(), keyWord)) {
bean.setNum(keyMatch++);
keywordlist.add(bean);
}
content = new StringBuilder();
}
}
}
} else {
if (StringUtils.contains(content.toString(), keyWord) || StringUtils.contains(text, keyWord)) {
bean.setNum(keyMatch++);
keywordlist.add(bean);
} else if ((!StringUtils.isEmpty(text) && keyWord.startsWith(text)) || content.length() > 0) {
content.append(text);
if (content.length() >= keyWord.length()) {
if (StringUtils.contains(content.toString(), keyWord)) {
bean.setNum(keyMatch++);
keywordlist.add(bean);
}
content = new StringBuilder();
}
}
}
}
}
return keywordlist;
}
}
KeyWordBean
public class KeyWordBean implements Comparable {
public KeyWordBean() {
super();
}
public KeyWordBean(float x, float y, int page, String text) {
super();
this.x = x;
this.y = y;
this.page = page;
this.text = text;
}
private float x;
private float y;
//pdf的页面
private int page;
//当前页面中第几个
private int num;
private String text;
public int getNum() {
return num;
}
public void setNum(int num) {
this.num = num;
}
public float getX() {
return x;
}
public void setX(float x) {
this.x = x;
}
public float getY() {
return y;
}
public void setY(float y) {
this.y = y;
}
public int getPage() {
return page;
}
public void setPage(int page) {
this.page = page;
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
@Override
public String toString() {
return "KeyWordBean [x=" + x + ", y=" + y + ", page=" + page + ",num=" + num + "]";
}
@Override
public int compareTo(KeyWordBean o) {
int i = (int) (o.getY() - this.getY());// 先按照Y轴排序
if (i == 0) {
return (int) (this.x - o.getX());// 如果Y轴相等了再按X轴进行排序
}
return i;
}
}
KeyWordstest.java 测试类
import java.util.List;
import java.util.Map;
public class KeyWordstest {
public static void main(String[] args) throws Exception {
KeyWordstest test = new KeyWordstest();
test.getkeywordtest();
test.getkeywordlisttest();
}
public void getkeywordtest() {
System.out.println("======getkeywordtest======");
// 1.解析pdf文件
Map> map = KeywordPDFUtils.getPDFText("D:\\tmp\\doctopdf\\test.pdf");
int page = 1;
int num = 1;
String keyWord = "授权人:";
// 2.获取关键字坐标
KeyWordBean bean = KeywordPDFUtils.getKeyWordXY(map, page, num, keyWord);
if (null == bean) {
System.out.println("未查询到关键字。。。");
}
System.out.println(bean.toString());
}
public void getkeywordlisttest() {
System.out.println("======getkeywordlisttest======");
// 1.解析pdf文件
Map> map = KeywordPDFUtils.getPDFText("D:\\tmp\\doctopdf\\test.pdf");
String keyWord = "授权人:";
// 2.获取关键字坐标
List beanlist = KeywordPDFUtils.getKeyWordXY(map, keyWord);
if (beanlist.size() == 0) {
System.out.println("未查询到关键字。。。");
}
for (KeyWordBean bean : beanlist) {
System.out.println(bean.toString());
}
}
}