poi读取ppt的例子,不只是读取ppt里面的文字,还要又文字的样式,布局,图片等。poi包从官网下载即可。
HSLF是POI读写PPT的API,例子见官方文档。
http://poi.apache.org/slideshow/quick-guide.html
文档格式
http://poi.apache.org/slideshow/ppt-file-format.html
操作Shape的API
http://poi.apache.org/slideshow/how-to-shapes.html
public class PPTReader {
public static void main(String[] args) throws Exception {
InputStream is = new FileInputStream(new File("2003.ppt"));
PowerPointExtractor extractor = new PowerPointExtractor(is);
String ppString = extractor.getText();
System.out.println(ppString);
}
public void findpIC() {
try{
SlideShow ppt = new SlideShow(new HSLFSlideShow("2003.ppt"));
//将所有图片提取出来保存到PictureData[]
PictureData[] pDatas= ppt.getPictureData();
for (int i = 0; i < pDatas.length; i++) {
PictureData pict =pDatas[i];
// 获得每张图片的数据
byte[] data = pict.getData();
//获得每张图片的类型
int type = pict.getType();
String ext;
switch (type) {
case Picture.JPEG: ext=".jpg";break;
case Picture.PNG: ext=".png"; break;
case Picture.WMF: ext=".wmf"; break;
case Picture.EMF: ext=".emf"; break;
case Picture.PICT: ext=".pict"; break;
default:continue;
}
//输出图片命名方式:pic_i_ext
FileOutputStream out = new FileOutputStream("pic_"+i + ext);
out.write(data);
out.close();
}
}catch (IOException e) {
// This is not a powerpoint file
e.printStackTrace();
}
}
3/ 添加一个新的图片插入到新的幻灯片中,保存
public void addSlide() throws IOException {
SlideShow ppt = new SlideShow(new HSLFSlideShow("2003.ppt"));
// 添加一个新的图片插入到新的幻灯片中
int idx =ppt.addPicture(new File("cat.jpg"), Picture.JPEG);
//在幻灯片中设置图片的大小和位置
Picture pict = new Picture(idx);
pict.setAnchor(new Rectangle(100,100,300,200));
Slide slide = ppt.createSlide();
slide.addShape(pict);
//检索图片并将ppt保存到磁盘上
slide = ppt.getSlides()[0];
Shape[]sh =slide.getShapes();
for (int i = 0; i < sh.length; i++){
if (sh[i] instanceof Picture){
Picture pict1 = (Picture)sh[i];
PictureData pictData = pict1.getPictureData();
byte[] data = pictData.getData();
int type = pictData.getType();
if (type == Picture.JPEG){
FileOutputStream out = new FileOutputStream("slide0_"+i+".jpg");
out.write(data);
out.close();
} else if (type == Picture.PNG){
FileOutputStream out = new FileOutputStream("slide0_"+i+".png");
out.write(data);
out.close();
}
}
}
FileOutputStream out = new FileOutputStream("slideshow.ppt");
ppt.write(out);
out.close();
}
4/插入表格
@Test
public void createTable() throws IOException {
//准备数据
String[][] data = {
{"INPUT FILE", "NUMBER OF RECORDS"},
{"Item File", "11,559"},
{"Vendor File", "300"},
{"Purchase History File", "10,000"},
{"Total # of requisitions", "10,200,038"} };
SlideShow ppt = new SlideShow(new HSLFSlideShow("slideshow.ppt"));
Slide slide = ppt.createSlide();
//创建一个表格
Table table = new Table(5, 2);
for (int i = 0; i < data.length; i++) {
for (int j = 0; j < data[i].length; j++) {
TableCell cell = table.getCell(i, j);
cell.setText(data[i][j]);
RichTextRun rt = cell.getTextRun().getRichTextRuns()[0];
rt.setFontName("Arial");
rt.setFontSize(10);
cell.setVerticalAlignment(TextBox.AnchorMiddle);
cell.setHorizontalAlignment(TextBox.AlignCenter);
}
}
//设置表格的边距
Line border = table.createBorder();
border.setLineColor(Color.black);
border.setLineWidth(1.0);
table.setAllBorders(border);
//第一列的宽度
table.setColumnWidth(0, 300);
//第二列的宽度
table.setColumnWidth(1, 150);
slide.addShape(table);
table.moveTo(100, 100);
//保存
FileOutputStream out = new FileOutputStream("hslf-table.ppt");
ppt.write(out);
out.close();
}
5 去掉PPT中的形状图形和声音
public void removeShapes() throws IOException {
SlideShow ppt = new SlideShow(new HSLFSlideShow("bullets.ppt"));
Slide slide = ppt.createSlide();
Shape[] shape = slide.getShapes();
for (int i = 0; i < shape.length; i++) {
boolean ok = slide.removeShape(shape[i]);
if(ok){
System.out.println("you are successful remove the shape");
}
}
}
public void retrieveSound() throws IOException {
FileInputStream is = new FileInputStream("bullets.ppt");
SlideShow ppt = new SlideShow(is);
is.close();
SoundData[] sound = ppt.getSoundData();
for (int i = 0; i < sound.length; i++) {
//保存.WAV格式的音乐
if(sound[i].getSoundType().equals(".WAV")){
FileOutputStream out = new FileOutputStream(sound[i].getSoundName());
out.write(sound[i].getData());
out.close();
}
}
}
自己画了主要类图,不全,个人理解
输入流有SlideShow进入,输出流使用文本流写入保存修改关闭。
SlideShow指整个幻灯片,而Slide 指的是单张幻灯片。SlideMaster 是单张幻灯片的管理类。对于文本信息、超链接信息、声音修改使用TextBox、TextRun、SoundData等类对单张幻灯片进行修改。而背景修改需要使用SlideMaster类调用Fill类进行修改设置。