老梁 - https://cloud.tencent.com/developer/article/1502408
微光•无单位 - https://blog.csdn.net/LvWeijie941/article/details/105248627/
官方api - https://api.itextpdf.com/iText7/java/7.0.5/
具体版本为itexpdf7.0.4,使用Maven项目管理依赖
主要使用jar包: kernel-7.0.4.jar
转载请注明出处
m2
alimaven
aliyun maven
http://maven.aliyun.com/nexus/content/groups/public/
central
nexus
nexus
local private nexus
http://maven.oschina.net/content/groups/public/
true
false
nexus
local private nexus
http://maven.oschina.net/content/groups/public/
true
false
<dependencies>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>kernel</artifactId>
<version>7.0.4</version>
</dependency>
</dependencies>
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.canvas.parser.PdfDocumentContentParser;
import com.itextpdf.kernel.pdf.canvas.parser.listener.IPdfTextLocation;
import com.itextpdf.kernel.pdf.canvas.parser.listener.RegexBasedLocationExtractionStrategy;
import java.util.Collection;
public class test {
public static void main(String args[]){
String input = "target.pdf";
//通过指定pdf文件名,指定关键字,和指定的pdf文件的待处理页数做参数
getKeyWordsLocation(input, "presented", 46);
}
public static void getKeyWordsLocation(String input, String key, int pageNum){
RegexBasedLocationExtractionStrategy strategy = new RegexBasedLocationExtractionStrategy(key);
try{
//核心思路为对PdfDocument对象采用某种Strategy,这里使用RegexBasedLocationExtractionStrategy
PdfReader pr = new PdfReader(input);
PdfDocument pd = new PdfDocument(pr);
PdfDocumentContentParser pdcp = new PdfDocumentContentParser(pd);
//文本内容具体解析借助使用PdfDocumentContentParser类(实质使用PdfCanvasProcessor进行处理), 对待处理页面装配合适策略
RegexBasedLocationExtractionStrategy regexStrategy =
pdcp.processContent(pageNum, strategy);
//获取处理结果
Collection<IPdfTextLocation> resultantLocations = strategy.getResultantLocations();
//自定义结果处理
if (!resultantLocations.isEmpty()){
for(IPdfTextLocation item: resultantLocations){
Rectangle boundRectangle = item.getRectangle();
System.out.println(item.getText());
System.out.println("["+key + "] location of x: " + boundRectangle.getX() + " ,y: " + boundRectangle.getY());
}
}
else {
System.out.println("the result is null");
}
pr.close();
pd.close();
}catch (Exception e){
System.err.println("read file failed!");
e.printStackTrace();
}
}
}