itext根据关键字定位在pdf中的坐标

最近在弄pdf相关的东西,想搞一个根据关键字定位在pdf中的坐标位置,然后好在该位置放置一个空白签名域

参考了网上的代码,然后我根据网上的代码进行了一些改造,现在贴出来

参考文章:https://blog.csdn.net/weixin_43145779/article/details/83115766


			com.itextpdf
			itextpdf
			5.5.13
		
		
			com.itextpdf
			itext-asian
			5.2.0
		
		
			com.itextpdf
			itext-xtra
			5.5.13
		
		
			com.itextpdf
			itext-pdfa
			5.5.13
		
		
			com.itextpdf.tool
			xmlworker
			5.5.13
		
		
			junit
			junit
			${junit.version}
			test
		
		
			org.bouncycastle
			bcprov-jdk15on
			1.49
			jar
		
		
			org.bouncycastle
			bcpkix-jdk15on
			1.49
			jar
		
		
			org.jfree
			jfreechart
			1.0.19
		
		
		
			org.apache.commons
			commons-lang3
			3.4
		

 pom中的java包可能会有些多,因为还有其他的测试,所以没有去掉,另外也去了一些jar包,如果用不了少jar包就评论说一下,去掉的感觉是没用到的

KeywordPDFUtils.java


import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;

import com.google.common.collect.Lists;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.ImageRenderInfo;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.RenderListener;
import com.itextpdf.text.pdf.parser.TextRenderInfo;

public class KeywordPDFUtils {

	/**
	 * 将pdf内容按页面读取map中,map中的key为pdf的页面,value为该页的内容块list
	 * @param filePath
	 * @return
	 */
	public static Map> getPDFText(String filePath) {
		Map> map = new HashMap>();
		try {
			PdfReader pdfReader = new PdfReader(filePath);
			int pageNum = pdfReader.getNumberOfPages();
			PdfReaderContentParser pdfReaderContentParser = new PdfReaderContentParser(pdfReader);

			for (int i = 1; i <= pageNum; i++) {
				List lists = Lists.newArrayList();
				List pagelist = new ArrayList();
				pagelist.add(0, i);
				pdfReaderContentParser.processContent(i, new RenderListener() {
					@Override
					public void renderText(TextRenderInfo textRenderInfo) {
						String text = textRenderInfo.getText(); // 整页内容
						com.itextpdf.awt.geom.Rectangle2D.Float boundingRectange = textRenderInfo.getBaseline()
								.getBoundingRectange();
						KeyWordBean bean = new KeyWordBean();
						bean.setX(boundingRectange.x);
						bean.setY(boundingRectange.y);
						bean.setPage(pagelist.get(0));
						bean.setText(text);
						lists.add(bean);

					}

					@Override
					public void renderImage(ImageRenderInfo arg0) {
					}

					@Override
					public void endTextBlock() {
					}

					@Override
					public void beginTextBlock() {

					}
				});
				map.put(i, lists);
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		return map;
	}
	
	/**
	 * 根据第几页第几个关键字查找该关键字的xy坐标
	 * @param map2 pdf内容块,以页为key
	 * @param page 页
	 * @param num 页中的第几个
	 * @param keyWord 关键字
	 * @return
	 */
	public static KeyWordBean getKeyWordXY(Map> map2, int page, int num, String keyWord) {
		List list=getKeyWordXY(map2,keyWord);
		for(KeyWordBean bean:list) {
			if(bean.getPage()==page && bean.getNum()==num) {
				return bean;
			}
		}
		return null;
	}
	/**
	 * 根据关键字查找该关键字在pdf中的xy坐标list
	 * @param map2 key为pdf的页,value为该的内容块list
	 * @param keyWord
	 * @return 
	 */
	public static List getKeyWordXY(Map> map2,String keyWord) {
		int keyMatch = 1;
		StringBuilder content = new StringBuilder();
		List keywordlist= new ArrayList();
		for(int page:map2.keySet()) {
			List list=map2.get(page);

			Collections.sort(list);// 正序比较
			for (int i = 0; i < list.size(); i++) {
				KeyWordBean bean = list.get(i);
				String text = bean.getText();
				if (i + 1 != list.size()) {
					KeyWordBean beanNext = list.get(i + 1);
					float x = beanNext.getX() - bean.getX();
					float y = beanNext.getY() - bean.getY();
					if (y == 0 && x <= 1) {
					} else {
						if (StringUtils.contains(content.toString(), keyWord) || StringUtils.contains(text, keyWord)) {
							bean.setNum(keyMatch++);
							keywordlist.add(bean);
							
						} else if ((!StringUtils.isEmpty(text) && keyWord.startsWith(text)) || content.length() > 0) {
							content.append(text);
							if (content.length() >= keyWord.length()) {
								if (StringUtils.contains(content.toString(), keyWord)) {
									bean.setNum(keyMatch++);
									keywordlist.add(bean);
									
								}
								content = new StringBuilder();
							}
						}
					}
				} else {
					if (StringUtils.contains(content.toString(), keyWord) || StringUtils.contains(text, keyWord)) {
						bean.setNum(keyMatch++);
						keywordlist.add(bean);
					} else if ((!StringUtils.isEmpty(text) && keyWord.startsWith(text)) || content.length() > 0) {
						content.append(text);
						if (content.length() >= keyWord.length()) {
							if (StringUtils.contains(content.toString(), keyWord)) {
								bean.setNum(keyMatch++);
								keywordlist.add(bean);
							}
							content = new StringBuilder();
						}
					}
				}
			}
		}
		return keywordlist;
	}
}

KeyWordBean


public class KeyWordBean implements Comparable {
	public KeyWordBean() {
		super();
	}

	public KeyWordBean(float x, float y, int page, String text) {
		super();
		this.x = x;
		this.y = y;
		this.page = page;
		this.text = text;
	}

	private float x;
	private float y;
	//pdf的页面
	private int page;
	//当前页面中第几个
	private int num;
	private String text;

	public int getNum() {
		return num;
	}

	public void setNum(int num) {
		this.num = num;
	}

	public float getX() {
		return x;
	}

	public void setX(float x) {
		this.x = x;
	}

	public float getY() {
		return y;
	}

	public void setY(float y) {
		this.y = y;
	}

	public int getPage() {
		return page;
	}

	public void setPage(int page) {
		this.page = page;
	}

	public String getText() {
		return text;
	}

	public void setText(String text) {
		this.text = text;
	}

	@Override
	public String toString() {
		return "KeyWordBean [x=" + x + ", y=" + y + ", page=" + page + ",num=" + num + "]";
	}

	@Override
	public int compareTo(KeyWordBean o) {
		int i = (int) (o.getY() - this.getY());// 先按照Y轴排序
		if (i == 0) {
			return (int) (this.x - o.getX());// 如果Y轴相等了再按X轴进行排序
		}
		return i;
	}
}

KeyWordstest.java 测试类


import java.util.List;
import java.util.Map;

public class KeyWordstest {
	public static void main(String[] args) throws Exception {
		KeyWordstest test = new KeyWordstest();
		test.getkeywordtest();
		test.getkeywordlisttest();
	}

	public void getkeywordtest() {
		System.out.println("======getkeywordtest======");
		// 1.解析pdf文件
		Map> map = KeywordPDFUtils.getPDFText("D:\\tmp\\doctopdf\\test.pdf");
		int page = 1;
		int num = 1;
		String keyWord = "授权人:";
		// 2.获取关键字坐标
		KeyWordBean bean = KeywordPDFUtils.getKeyWordXY(map, page, num, keyWord);
		if (null == bean) {
			System.out.println("未查询到关键字。。。");
		}
		System.out.println(bean.toString());

	}

	public void getkeywordlisttest() {
		System.out.println("======getkeywordlisttest======");
		// 1.解析pdf文件
		Map> map = KeywordPDFUtils.getPDFText("D:\\tmp\\doctopdf\\test.pdf");

		String keyWord = "授权人:";
		// 2.获取关键字坐标
		List beanlist = KeywordPDFUtils.getKeyWordXY(map, keyWord);
		if (beanlist.size() == 0) {
			System.out.println("未查询到关键字。。。");
		}
		for (KeyWordBean bean : beanlist) {
			System.out.println(bean.toString());
		}

	}
}

 

你可能感兴趣的:(itext学习笔记)