office文件内容读取

实现对office文件内容的读取
使用jdk版本为1.7
使用jar包poi-3.9下载链接

import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;

/**
 * Created by leo01 on 17-1-27.
 */
public class test {
    public static void main(String[] args) throws Exception
    {
        String targetDirectory = "/home/leo01/Desktop";
        //获取文件夹位置
        File file = new File(targetDirectory);
        //保存所有文件
        String test[];
        test = file.list();
        //获取所有文件的文件名
        for(int i = 0;i
        {
            String ss = "";
            File f = new File(test[i]);
            String filename = f.getName();
            //prefix保存了文件后缀
            String prefix = filename.substring(filename.lastIndexOf(".")+1);
            //fis2 为文件路径
            String fis2 = targetDirectory+File.separator+test[i];
            FileInputStream fis = new FileInputStream(fis2);
            //*.doc 文件读取
            if("doc".equals(prefix))
            {
                WordExtractor wordExtractor;
                try
                {
                    FileInputStream docfile = new FileInputStream(fis2);
                    wordExtractor = new WordExtractor(docfile);
                    String[] paragraph = wordExtractor.getParagraphText();
                    for(int j=0;j
            // *.xls 文件读取
            else if("xls".equals(prefix))
            {
                HSSFWorkbook hssfWorkbook = new HSSFWorkbook(fis);
                //获得第一个工作表Sheet
                HSSFSheet hssfSheet = hssfWorkbook.getSheetAt(0);
                //获得第一行ROW
                HSSFRow hssfRow = hssfSheet.getRow(0);
                //用StringBuffer 得到 Excel 表格第一行的内容并用都好分隔
                StringBuffer stringBuffer = new StringBuffer();
                for(int j=0;j
                {
                    stringBuffer.append(hssfRow.getCell(j));
                    int fc = hssfRow.getLastCellNum()-1;
                    if(j != fc)
                    {
                        stringBuffer.append(",");
                    }
                }
                System.out.println(stringBuffer);
            }
            //*.xlsx 文件读取
            /*else if("xlsx".equals(prefix))
            {
                XSSFWorkbook xssfWorkbook = new XSSFWorkbook(fis);
                //取得第一个工作表Sheet
                XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(0);
                //取得第一行
                XSSFRow xssfRow = xssfSheet.getRow(0);
                //循环列cell
                StringBuffer stringBuffer = new StringBuffer();
                for(int j =0;j
            //*.txt 文件读取
            else if("txt".equals(prefix))
            {
                BufferedReader reader;
                try
                {
                    reader = new BufferedReader(new FileReader(fis2));
                    while(reader.ready())
                    {
                        ss += reader.readLine();
                    }
                    reader.close();
                    System.out.println(ss);
                }
                catch (Exception e)
                {
                    e.printStackTrace();
                }
            }
        }
    }
}

对docx和xlsx文件读取是总是出现错误java.lang.NoClassDefFoundError异常,还不知道为什么会出现这个异常。
office文件内容读取_第1张图片

你可能感兴趣的:(新手java学习,java,office)