Itext7获取关键字在文件中的坐标

目录

        1. maven配置

        2. 实体类

        3. java代码


1. maven配置


	cn.hutool
	hutool-all
	5.8.11



	com.itextpdf
	itext7-core
	7.2.0
	pom

2. 实体类

package com.example.demo.itext.model;

import lombok.Data;

import java.io.Serializable;

@Data
public class KeyWordBean implements Comparable, Serializable {

    private float x;
    private float y;
    private float width;
    private float height;
    // pdf的页面
    private int page;
    // 当前页面中第几个
    private int num;
    private String text;

    @Override
    public int compareTo(KeyWordBean o) {
        // 先按照Y轴排序
        int i = (int) (o.getY() - this.getY());
        if (i == 0) {
            // 如果Y轴相等了再按X轴进行排序
            return (int) (this.x - o.getX());
        }
        return i;
    }
}

3. java代码

package com.example.demo.itext.util;

import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.json.JSONUtil;
import com.example.demo.itext.model.KeyWordBean;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.canvas.parser.PdfCanvasProcessor;
import com.itextpdf.kernel.pdf.canvas.parser.listener.IPdfTextLocation;
import com.itextpdf.kernel.pdf.canvas.parser.listener.RegexBasedLocationExtractionStrategy;

import java.io.IOException;
import java.util.*;

public class ItextPDFUtil {
    public static void main(String args[]) throws IOException {
        String path = "F:\\software\\myfile\\txt12_加水印.pdf";
        System.out.println("关键字在PDF文件中的文字信息:" + JSONUtil.toJsonStr(keyWordLocationMap("负责人签名:", path)));
    }

    /**
     * 功能描述: 获取关键字在pdf中的坐标 
* * @Param: [KEY_WORD:关键字, input:pdf文件路径] * @Return: java.util.Map> * @Author: lhp * @Date: 2023/1/29 14:53 */ public static Map> keyWordLocationMap(String KEY_WORD, String input) { Map> listMap; PdfDocument pdfDocument = null; try { PdfReader reader = new PdfReader(input); pdfDocument = new PdfDocument(reader); int pageNumbers = pdfDocument.getNumberOfPages(); listMap = new HashMap<>(pageNumbers); for (int i = 1; i <= pageNumbers; i++) { PdfPage page = pdfDocument.getPage(i); RegexBasedLocationExtractionStrategy strategy = new RegexBasedLocationExtractionStrategy(KEY_WORD); PdfCanvasProcessor canvasProcessor = new PdfCanvasProcessor(strategy); canvasProcessor.processPageContent(page); Collection resultantLocations = strategy.getResultantLocations(); //自定义结果处理 if (!resultantLocations.isEmpty()) { List keyWordBeanList = new ArrayList<>(); List iPdfTextLocationList = CollectionUtil.newArrayList(resultantLocations); for (int m = 0; m < iPdfTextLocationList.size(); m++) { IPdfTextLocation item = iPdfTextLocationList.get(m); Rectangle boundRectangle = item.getRectangle(); KeyWordBean keyWordBean = new KeyWordBean(); keyWordBean.setPage(item.getPageNumber()); keyWordBean.setX(boundRectangle.getX()); keyWordBean.setY(boundRectangle.getY()); keyWordBean.setWidth(boundRectangle.getWidth()); keyWordBean.setHeight(boundRectangle.getHeight()); keyWordBean.setText(item.getText()); keyWordBean.setNum(m + 1); System.out.println("关键字“" + KEY_WORD + "” 的坐标为 x: " + boundRectangle.getX() + " ,y: " + boundRectangle.getY()); keyWordBeanList.add(keyWordBean); } listMap.put(i, keyWordBeanList); } } pdfDocument.close(); } catch (IOException e) { throw new RuntimeException(e); } finally { if (pdfDocument != null) { pdfDocument.close(); } } return listMap; } }

你可能感兴趣的:(itext,java)