使用POI读写PowerPoint文件(兼容ppt与pptx版本)

阅读更多

调用示例:

 

 

File powerPointFile = new File("D:\\temp.ppt");

//读取PowerPoint文档中所有文本内容,以字符串形式返回  
System.out.println(PowerPointFileUtil.extractTextFromPowerPointFile(powerPointFile , "," , ";"));

 

 

工具类源码:

 

/**
 * BasePowerPointFileUtil.java
 * Copyright ® 2017 窦海宁
 * All right reserved
 */

package org.aiyu.core.common.util.file.office;

import java.util.ArrayList;
import java.util.List;

import org.apache.poi.sl.usermodel.AutoShape;
import org.apache.poi.sl.usermodel.Shape;
import org.apache.poi.sl.usermodel.Slide;
import org.apache.poi.sl.usermodel.SlideShow;

/**
 * 

PowerPoint文件工具基类 * *

通用的PowerPoint文件工具基类,可用于从PowerPoint文档中抽取文本信息 * * @author 窦海宁, [email protected] * @since AiyuCommonCore-1.0 * @version AiyuCommonCore-1.0 */ public abstract class BasePowerPointFileUtil { /** *

读取PowerPoint文件中的幻灯片对象 * * @param slideShow SlideShow对象 * * @return 读取出的工作薄列表 * * @modify 窦海宁, 2017-01-18 */ protected static List readSlideShow(SlideShow slideShow) { List slideList = null; if (slideShow != null) { slideList = new ArrayList(); List slides = slideShow.getSlides(); for (int i = 0 ; i < slides.size() ; i++) { slideList.add(BasePowerPointFileUtil.readSlide((Slide) slides.get(i))); } } return slideList; } /** *

读取指定的Slide中的数据 * * @param slide Slide对象 * * @return 读取出的Slide数据列表 * * @modify 窦海宁, 2017-01-18 */ protected static List readSlide(Slide slide) { List shapeList = null; if (slide != null) { shapeList = new ArrayList(); List shapes = slide.getShapes(); for (int i = 0 ; i < shapes.size() ; i++) { shapeList.add(BasePowerPointFileUtil.readShape((Shape) shapes.get(i))); } } return shapeList; } /** *

读取指定的图形的数据 * * @param shape Slide中的图形对象 * * @return 读取出的图形数据 * * @modify 窦海宁, 2017-01-18 */ protected static Object readShape(Shape shape) { String returnValue = null; if (shape != null) { if (shape instanceof AutoShape) { try { returnValue = ((AutoShape) shape).getText(); } catch (Exception ex) { ex.printStackTrace(); } } } return returnValue; } }

PowerPoint2003版本工具类:
 
/**
 * PowerPoint2003FileUtil.java
 * Copyright ® 2010 窦海宁
 * All right reserved
 */

package org.aiyu.core.common.util.file.office;

import java.io.File;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hslf.usermodel.HSLFSlideShow;
import org.apache.poi.hslf.usermodel.HSLFSlideShowImpl;
import org.apache.poi.sl.usermodel.SlideShow;

/**
 * 

PowerPoint2003版文件工具类 * *

通用的PowerPoint2003版文件工具类,可用于从PowerPoint文档中抽取文本信息 * * @author 窦海宁, [email protected] * @since AiyuCommonCore-1.0 * @version AiyuCommonCore-1.0 */ public abstract class PowerPoint2003FileUtil extends BasePowerPointFileUtil { /** *

从PowerPoint文档中提取文本信息 * * @param powerPointFile PowerPoint文件 * @param shapeSeparator Shape分隔符 * @param slideSeparator Slide分隔符 * * @return 提取后的文本信息 * * @modify 窦海宁, 2017-01-18 */ protected static String extractTextFromPowerPointFile(File powerPointFile , String shapeSeparator , String slideSeparator) { StringBuffer returnValue = new StringBuffer(); if (powerPointFile != null && slideSeparator != null && shapeSeparator != null) { if (powerPointFile.isFile()) { try { SlideShow slideShow = new HSLFSlideShow(new HSLFSlideShowImpl(powerPointFile.getCanonicalPath())); Iterator slideIterator = PowerPoint2003FileUtil.readSlideShow(slideShow).iterator(); //遍历Slide while (slideIterator.hasNext()) { Iterator shapeIterator = ((List) slideIterator.next()).iterator(); //遍历Shape while (shapeIterator.hasNext()) { Object shapeValue = shapeIterator.next(); if (shapeValue != null) { returnValue.append((String) shapeValue); if (shapeIterator.hasNext()) { returnValue.append(shapeSeparator); } } } if (slideIterator.hasNext()) { returnValue.append(slideSeparator); } } } catch (Exception ex) { ex.printStackTrace(); } } } return StringUtils.trimToNull(returnValue.toString()); } }

 
PowerPoint2007版本工具类:
 
/**
 * PowerPoint2007FileUtil.java
 * Copyright ® 2017 窦海宁
 * All right reserved
 */

package org.aiyu.core.common.util.file.office;

import java.io.File;
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xslf.usermodel.XMLSlideShow;

/**
 * 

PowerPoint2007版文件工具类 * *

通用的PowerPoint2007版文件工具类,可用于从PowerPoint文档中抽取文本信息 * * @author 窦海宁, [email protected] * @since AiyuCommonCore-1.0 * @version AiyuCommonCore-1.0 */ public abstract class PowerPoint2007FileUtil extends BasePowerPointFileUtil { /** *

从PowerPoint文档中提取文本信息 * * @param powerPointFile PowerPoint文件 * @param shapeSeparator Shape分隔符 * @param slideSeparator Slide分隔符 * * @return 提取后的文本信息 * * @modify 窦海宁, 2017-01-18 */ protected static String extractTextFromPowerPointFile(File powerPointFile , String shapeSeparator , String slideSeparator) { StringBuffer returnValue = new StringBuffer(); if (powerPointFile != null && slideSeparator != null && shapeSeparator != null) { if (powerPointFile.isFile()) { try { XMLSlideShow slideShow = new XMLSlideShow(new FileInputStream(powerPointFile)); Iterator slideIterator = PowerPoint2007FileUtil.readSlideShow(slideShow).iterator(); //遍历Slide while (slideIterator.hasNext()) { Iterator shapeIterator = ((List) slideIterator.next()).iterator(); //遍历Shape while (shapeIterator.hasNext()) { Object shapeValue = shapeIterator.next(); if (shapeValue != null) { returnValue.append((String) shapeValue); if (shapeIterator.hasNext()) { returnValue.append(shapeSeparator); } } } if (slideIterator.hasNext()) { returnValue.append(slideSeparator); } } } catch (Exception ex) { ex.printStackTrace(); } } } return StringUtils.trimToNull(returnValue.toString()); } }

 
统一调用工具类:
 
/**
 * PowerPointFileUtil.java
 * Copyright ® 2017 窦海宁
 * All right reserved
 */

package org.aiyu.core.common.util.file.office;

import java.io.File;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;

/**
 * 

PowerPoint文件工具类 * *

通用的PowerPoint文件工具类,可用于从PowerPoint文档中抽取文本信息 * * @author 窦海宁, [email protected] * @since AiyuCommonCore-1.0 * @version AiyuCommonCore-1.0 */ public abstract class PowerPointFileUtil extends BasePowerPointFileUtil { /** *

从PowerPoint文档中提取文本信息 * * @param powerPointFile PowerPoint文件 * @param shapeSeparator Shape分隔符 * @param slideSeparator Slide分隔符 * * @return 提取后的文本信息 * * @modify 窦海宁, 2017-02-06 */ public static String extractTextFromPowerPointFile(File powerPointFile , String shapeSeparator , String slideSeparator) { String resultText = null; if (powerPointFile != null && powerPointFile.exists()) { String extension = FilenameUtils.getExtension(powerPointFile.getName()); if (StringUtils.equalsIgnoreCase("ppt" , extension)) { //Office2003版文件处理 resultText = PowerPoint2003FileUtil.extractTextFromPowerPointFile(powerPointFile , shapeSeparator , slideSeparator); } else if (StringUtils.equalsIgnoreCase("pptx" , extension)) { //Office2007版文件处理 resultText = PowerPoint2003FileUtil.extractTextFromPowerPointFile(powerPointFile , shapeSeparator , slideSeparator); } else { //文件类型有误 } } return resultText; } }

 
统一调用工具类通过文件扩展名(PPT与PPTX,不区分大小写)判断文件版本,暂时没有想到更好的办法;本工具类使用POI_3.15实现,无须目标机器安装OFFICE软件也可进行文件读写。
  • PowerPointFileUtil.rar (3.6 KB)
  • 下载次数: 23

你可能感兴趣的:(java,office,powerpoint,poi)