<dependencies>
<dependency>
<groupId>com.itextpdfgroupId>
<artifactId>itextpdfartifactId>
<version>5.5.11version>
dependency>
<dependency>
<groupId>com.itextpdfgroupId>
<artifactId>itext-asianartifactId>
<version>5.2.0version>
dependency>
dependencies>
@SuppressWarnings("unchecked")
@RequestMapping(params = "cgImportPdf", method = RequestMethod.POST)
@ResponseBody
public AjaxJson cgImportPdf(HttpServletRequest request, HttpServletResponse response) throws Exception {
String msg = "添加成功";
AjaxJson j = new AjaxJson();
MultipartHttpServletRequest multipartRequest = (MultipartHttpServletRequest) request;
List<MultipartFile> contactFile= new ArrayList<MultipartFile>();
Map<String, MultipartFile> fileMap = multipartRequest.getFileMap();
for (Map.Entry<String, MultipartFile> entity : fileMap.entrySet()) {
MultipartFile file = entity.getValue();// 获取上传文件对象
PdfReader reader = null;
try {
//可以传入输入流创建 PdfReader对象,也可以使用文件路径创建 PdfReader对象
reader = new PdfReader(file.getInputStream());
//获取pdf的页数
int pageNum = reader.getNumberOfPages();
String pageContent = "";
for (int i = 1; i <= pageNum; i++) {// 只能从第1页开始读
pageContent += PdfTextExtractor.getTextFromPage(reader, i);
}
//pdf文件的所有内容
System.out.println("pageContent:" + pageContent);
} catch (IOException e) {
msg = "添加失败";
e.printStackTrace();
} finally {
reader.close();
}
}
j.setMsg(msg);
return j;
}
@SuppressWarnings("unchecked")
@RequestMapping(params = "cgImportPdf", method = RequestMethod.POST)
@ResponseBody
public AjaxJson cgImportPdf(HttpServletRequest request, HttpServletResponse response) throws Exception {
String msg = "添加成功";
AjaxJson j = new AjaxJson();
MultipartHttpServletRequest multipartRequest = (MultipartHttpServletRequest) request;
List<MultipartFile> contactFile= new ArrayList<MultipartFile>();
Map<String, MultipartFile> fileMap = multipartRequest.getFileMap();
for (Map.Entry<String, MultipartFile> entity : fileMap.entrySet()) {
MultipartFile file = entity.getValue();// 获取上传文件对象
PdfReader reader = null;
try {
//可以传入输入流创建 PdfReader对象,也可以使用文件路径创建 PdfReader对象
reader = new PdfReader(file.getInputStream());
// 坐标方法
TextExtractionStrategy strategy;
//创建坐标对象
Rectangle2D.Float attachedF = new Rectangle2D.Float(225.1f, 39.5f, 7.738739f, 339.5f); //第一个参数代表X轴坐标, 第二参数代表Y轴坐标,第三个参数代表宽,第四个三处代表高
RenderFilter attached = new RegionTextRenderFilter(attachedF);
//获取pdf的页数
int pageNum = reader.getNumberOfPages();
String pageContent = "";
for (int i = 1; i <= pageNum; i++) {// 只能从第1页开始读
pageContent += PdfTextExtractor.getTextFromPage(reader, i);
// 根据坐标获取的内容
strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), attached);
String attachedV = PdfTextExtractor.getTextFromPage(reader, i, strategy);
System.out.println("attachedV:" + attachedV);
}
//pdf文件的所有内容
System.out.println("pageContent:" + pageContent);
} catch (IOException e) {
msg = "添加失败";
e.printStackTrace();
} finally {
reader.close();
}
}
j.setMsg(msg);
return j;
}
首先创建一个获取坐标的工具类,实现 RenderListener类,重写方法
package com.jeecg.ldcorder.service;
import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import javax.imageio.ImageIO;
import com.itextpdf.awt.geom.Rectangle2D;
import com.itextpdf.awt.geom.RectangularShape;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.parser.ImageRenderInfo;
import com.itextpdf.text.pdf.parser.RenderListener;
import com.itextpdf.text.pdf.parser.TextRenderInfo;
public class TestRenderListener implements RenderListener {
//用来存放文字的矩形
public List<Rectangle2D.Float> rectText = new ArrayList<Rectangle2D.Float>();
//用来存放文字
public List<String> textList = new ArrayList<String>();
//用来存放文字的y坐标
public List<Float> listY = new ArrayList<Float>();
//用来存放每一行文字的坐标位置
public List<Map<String,Rectangle2D.Float>> rows_text_rect = new ArrayList<>();
//PDF文件的路径
protected String filepath = null;
public TestRenderListener() {
}
//step 2,遇到"BT"执行
@Override
public void beginTextBlock() {
// TODO Auto-generated method stub
}
//step 3
/**
* 文字主要处理方法
*/
@Override
public void renderText(TextRenderInfo renderInfo) {
//获取文字的下面的矩形
//Rectangle2D.Float rectBase = renderInfo.getBaseline().getBoundingRectange();
String text = renderInfo.getText();
if(text.length() > 0){
RectangularShape rectBase = renderInfo.getBaseline().getBoundingRectange();
//获取文字下面的矩形
Rectangle2D.Float rectAscen = renderInfo.getAscentLine().getBoundingRectange();
//计算出文字的边框矩形
float leftX = (float) rectBase.getMinX();
float leftY = (float) rectBase.getMinY()-1;
float rightX = (float) rectAscen.getMaxX();
float rightY = (float) rectAscen.getMaxY()+1;
Rectangle2D.Float rect = new Rectangle2D.Float(leftX, leftY, rightX - leftX, rightY - leftY);
System.out.println("text:"+text+"--x:"+rect.x + "--y:"+rect.y + "--width:"+rect.width + "--height:"+rect.height);
if(listY.contains(rect.y)){
int index = listY.indexOf(rect.y);
float tempx = rect.x > rectText.get(index).x ? rectText.get(index).x : rect.x;
rectText.set(index,new Rectangle2D.Float(tempx,rect.y,rect.width + rectText.get(index).width,rect.height));
textList.set(index,textList.get(index) + text);
}else{
rectText.add(rect);
textList.add(text);
listY.add(rect.y);
}
Map<String,Rectangle2D.Float> map = new HashMap<>();
map.put(text,rect);
rows_text_rect.add(map);
}
}
//step 4(最后执行的,只执行一次),遇到“ET”执行
@Override
public void endTextBlock() {
// TODO Auto-generated method stub
}
//step 1(图片处理方法)
@Override
public void renderImage(ImageRenderInfo renderInfo) {
}
}
调用创建的工具类,获取内容坐标
@SuppressWarnings("unchecked")
@RequestMapping(params = "cgImportPdf", method = RequestMethod.POST)
@ResponseBody
public AjaxJson cgImportPdf(HttpServletRequest request, HttpServletResponse response) throws Exception {
String msg = "添加成功";
AjaxJson j = new AjaxJson();
MultipartHttpServletRequest multipartRequest = (MultipartHttpServletRequest) request;
List<MultipartFile> contactFile= new ArrayList<MultipartFile>();
Map<String, MultipartFile> fileMap = multipartRequest.getFileMap();
for (Map.Entry<String, MultipartFile> entity : fileMap.entrySet()) {
MultipartFile file = entity.getValue();// 获取上传文件对象
PdfReader reader = null;
try {
//可以传入输入流创建 PdfReader对象,也可以使用文件路径创建 PdfReader对象
reader = new PdfReader(file.getInputStream());
//创建pdf解析类
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
//获取pdf的页数
int pageNum = reader.getNumberOfPages();
String pageContent = "";
for (int i = 1; i <= pageNum; i++) {// 只能从第1页开始读
pageContent += PdfTextExtractor.getTextFromPage(reader, i);
TestRenderListener listener = new TestRenderListener();
// 解析PDF,并处理里面的文字
parser.processContent(i, listener);
// 获取文字的矩形边框
List<Rectangle2D.Float> rectText = listener.rectText;
List<String> textList = listener.textList;
List<Float> listY = listener.listY;
List<Map<String, Rectangle2D.Float>> list_text = listener.rows_text_rect;
for (int k = 0; k < list_text.size(); k++) {
Map<String, Rectangle2D.Float> map = list_text.get(k);
for (Map.Entry<String, Rectangle2D.Float> entry : map.entrySet()) {
//每个内容和对应的坐标
System.out.println(entry.getKey() + "---" + entry.getValue());
}
}
}
//pdf文件的所有内容
System.out.println("pageContent:" + pageContent);
} catch (IOException e) {
msg = "添加失败";
e.printStackTrace();
} finally {
reader.close();
}
}
j.setMsg(msg);
return j;
}