PowerPoint文件读取工具类

调用示例:

 

 

File powerPointFile = new File("D:\\temp.ppt");

//读取PowerPoint文档中所有文本内容,以字符串形式返回  
System.out.println(PowerPointFileUtil.extractTextFromPowerPointFile(powerPointFile , "," , ";"));

 

 

工具类源码:

 

/**
 * PowerPointFileUtil.java
 * Copyright ® 2010 窦海宁
 * All right reserved
 */

package org.aiyu.core.common.util.file;

import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.poi.hslf.HSLFSlideShow;
import org.apache.poi.hslf.model.AutoShape;
import org.apache.poi.hslf.model.Shape;
import org.apache.poi.hslf.model.Slide;
import org.apache.poi.hslf.usermodel.SlideShow;

/**
 * <p>PowerPoint文件工具类
 * 
 * <p>通用的PowerPoint文件工具类,可用于从PowerPoint文档中抽取文本信息
 * 
 * <p>您好,我是窦海宁,现在是一名免费开源工具研发人员,如果您喜欢我的开源代码,
 * <p>如果您希望我更好的发展下去,为您提供更多更好的开源代码,在这里感谢您的捐助。
 * <p>捐助地址:https://me.alipay.com/chong0660
 * 
 * @author  窦海宁, [email protected]
 * @since   AiyuCommonCore-1.0
 * @version AiyuCommonCore-1.0
 */
@SuppressWarnings("unchecked")
public abstract class PowerPointFileUtil {

	/**
	 * <p>从PowerPoint文档中提取文本信息
	 * 
	 * @param  powerPointFile PowerPoint文件
	 * @param  shapeSeparator Shape分隔符
	 * @param  slideSeparator Slide分隔符
	 * 
	 * @return 提取后的文本信息
	 * 
	 * @modify 窦海宁, 2013-07-03
	 */
	public static String extractTextFromPowerPointFile(File powerPointFile , String shapeSeparator , String slideSeparator) {

		StringBuffer returnValue = new StringBuffer();
		if (powerPointFile != null && slideSeparator != null && shapeSeparator != null) {

			if (powerPointFile.isFile()) {

				try {

					SlideShow slideShow     = new SlideShow(new HSLFSlideShow(powerPointFile.getCanonicalPath()));
					Iterator  slideIterator = PowerPointFileUtil.readSlideShow(slideShow).iterator();
					//遍历Slide
					while (slideIterator.hasNext()) {

						Iterator shapeIterator = ((List) slideIterator.next()).iterator();
						//遍历Shape
						while (shapeIterator.hasNext()) {

							Object shapeValue = shapeIterator.next();
							if (shapeValue != null) {

								returnValue.append((String) shapeValue);
								if (shapeIterator.hasNext()) {

									returnValue.append(shapeSeparator);
								}
							}
						}
						if (slideIterator.hasNext()) {

							returnValue.append(slideSeparator);
						}
					}
				} catch (Exception ex) {

					ex.printStackTrace();
				}
			}
		}
		return StringUtils.trimToNull(returnValue.toString());
	}

	/**
	 * <p>读取PowerPoint文件中的幻灯片对象
	 * 
	 * @param  slideShow SlideShow对象
	 * 
	 * @return 读取出的工作薄列表
	 * 
	 * @modify 窦海宁, 2008-08-07
	 */
	public static List readSlideShow(SlideShow slideShow) {

		List slideList = null;
		if (slideShow != null) {

			slideList = new ArrayList();
			Slide[] slides = slideShow.getSlides();
			for (int i = 0 ; i < slides.length ; i++) {

				slideList.add(PowerPointFileUtil.readSlide(slides[i]));
			}
		}
		return slideList;
	}

	/**
	 * <p>读取指定的Slide中的数据
	 * 
	 * @param  slide Slide对象
	 * 
	 * @return 读取出的Slide数据列表
	 * 
	 * @modify 窦海宁, 2008-08-07
	 */
	public static List readSlide(Slide slide) {

		List shapeList = null;
		if (slide != null) {

			shapeList = new ArrayList();
			Shape[] shape = slide.getShapes();
			for (int i = 0 ; i < shape.length ; i++) {

				shapeList.add(PowerPointFileUtil.readShape(shape[i]));
			}
		}
		return shapeList;
	}

	/**
	 * <p>读取指定的图形的数据
	 * 
	 * @param  shape Slide中的图形对象
	 * 
	 * @return 读取出的图形数据
	 * 
	 * @modify 窦海宁, 2010-01-07
	 */
	public static Object readShape(Shape shape) {

		String returnValue = null;
		if (shape != null) {

			if (shape instanceof AutoShape) {
				try {

					returnValue = ((AutoShape) shape).getText();
				} catch (Exception ex) {

					ex.printStackTrace();
				}
			}
		}
		return returnValue;
	}

}

你可能感兴趣的:(文件读取)