itextPDF操作PDF,获取PDF内容

class TestRenderListener implements RenderListener {

        //用来存放文字的矩形
        List rectText = new ArrayList();
        //用来存放文字
        List textList = new ArrayList();
        //用来存放文字的y坐标
        List listY = new ArrayList();
        //用来存放每一行文字的坐标位置
        List> rows_text_rect = new ArrayList<>();
        //PDF文件的路径
        protected String filepath = null;

        public TestRenderListener() {
        }

        //step 2,遇到"BT"执行
        @Override
        public void beginTextBlock() {
            // TODO Auto-generated method stub
        }

        //step 3

        /**
         * 文字主要处理方法
         */
        @Override
        public void renderText(TextRenderInfo renderInfo) {
            //获取文字的下面的矩形
            //Rectangle2D.Float rectBase = renderInfo.getBaseline().getBoundingRectange();


            String text = renderInfo.getText();
            if (text.length() > 0) {
                RectangularShape rectBase = renderInfo.getBaseline().getBoundingRectange();
                //获取文字下面的矩形
                Rectangle2D.Float rectAscen = renderInfo.getAscentLine().getBoundingRectange();
                //计算出文字的边框矩形
                float leftX = (float) rectBase.getMinX();
                float leftY = (float) rectBase.getMinY() - 1;
                float rightX = (float) rectAscen.getMaxX();
                float rightY = (float) rectAscen.getMaxY()   1;

                Rectangle2D.Float rect = new Rectangle2D.Float(leftX, leftY, rightX - leftX, rightY - leftY);

//                System.out.println("text:"   text   "--x:"   rect.x   "--y:"   rect.y   "--width:"   rect.width   "--height:"   rect.height);

                if (listY.contains(rect.y)) {
                    int index = listY.indexOf(rect.y);
                    float tempx = rect.x > rectText.get(index).x ? rectText.get(index).x : rect.x;
                    rectText.set(index, new Rectangle2D.Float(tempx, rect.y, rect.width   rectText.get(index).width, rect.height));
                    textList.set(index, textList.get(index)   text);
                } else {
                    rectText.add(rect);
                    textList.add(text);
                    listY.add(rect.y);
                }

                Map map = new HashMap<>();
                map.put(text, rect);
                rows_text_rect.add(map);
            }
        }

        //step 4(最后执行的,只执行一次),遇到“ET”执行
        @Override
        public void endTextBlock() {
            // TODO Auto-generated method stub
        }

        //step 1(图片处理方法)
        @Override
        public void renderImage(ImageRenderInfo renderInfo) {

        }
    }


        try {
            PdfReader reader = new PdfReader(pdfPath);
            //新建一个PDF解析对象
            PdfReaderContentParser parser = new PdfReaderContentParser(reader);
            //包含了PDF页面的信息,作为处理的对象
            PdfStamper stamper = new PdfStamper(reader, new FileOutputStream("E:/pdftest/test2.pdf"));
                        //获取pdf的页数
                        PdfContentByte page = stamper.getOverContent(1);
            for (int i = 1; i <= reader.getNumberOfPages(); i  ) {
                //新建一个ImageRenderListener对象,该对象实现了RenderListener接口,作为处理PDF的主要类
                TestRenderListener listener = new TestRenderListener();
                //解析PDF,并处理里面的文字
                parser.processContent(i, listener);
                //获取文字的矩形边框
                List rectText = listener.rectText;
                List textList = listener.textList;
                List listY = listener.listY;
                List> list_text = listener.rows_text_rect;
                Map map1 = new HashMap<>();
                for (int k = 0; k < list_text.size(); k  ) {
                    Map map = list_text.get(k);
                    for (Map.Entry entry : map.entrySet()) {
                        System.out.println(entry.getKey()   "----"   entry.getValue());
                    }
                }
            }
            System.out.println(JSON.toJSON(positionMap));
        } catch (Exception ex) {
            ex.printStackTrace();
        }

你可能感兴趣的:(itextPDF操作PDF,获取PDF内容)