java在文本处理中的相关辅助工具类

1,java分词

package com.bobo.util;



import ICTCLAS.I3S.AC.ICTCLAS50;



public class Cutwords {

    public static String Segment(String microblog) {

        String textSeg = "";

        try {

            ICTCLAS50 testICTCLAS50 = new ICTCLAS50();

            String argu = ".";

            testICTCLAS50.ICTCLAS_Init(argu.getBytes("GB2312"));



            String sInput = microblog;



            byte nativeBytes[] = testICTCLAS50.ICTCLAS_ParagraphProcess(

                    sInput.getBytes("GB2312"), 0, 0);

            String nativeStr = new String(nativeBytes, 0, nativeBytes.length,

                    "GB2312");



            textSeg = nativeStr;



        } catch (Exception ex) {



        }

        return textSeg;

    }

}
CutWords

2,java文件读写

package com.bobo.util;



import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.IOException;

import java.io.InputStreamReader;

import java.util.ArrayList;

import java.util.List;



public class FileUtil {



    public static ArrayList<String> FileList = new ArrayList<String>();



    /**

     * 列出某個目錄及其子目錄下所有的文件列表

     * 

     * @param filepath

     *            目錄路徑

     * @return 該路徑及其子路經下的所有文件名列表

     * @throws FileNotFoundException

     * @throws IOException

     */

    public static List<String> readDirs(String filepath)

            throws FileNotFoundException, IOException {

        try {

            File file = new File(filepath);

            if (!file.isDirectory()) {

                System.out.println("输入的不是目錄名称;");

                System.out.println("filepath:" + file.getAbsolutePath());

            } else {

                String[] flist = file.list();

                for (int i = 0; i < flist.length; i++) {

                    File newfile = new File(filepath + "/" + flist[i]);

                    if (!newfile.isDirectory()) {

                        FileList.add(newfile.getAbsolutePath());

                    } else if (newfile.isDirectory()) {

                        readDirs(filepath + "/" + flist[i]);

                    }

                }

            }

        } catch (FileNotFoundException e) {

            System.out.println(e.getMessage());

        }

        return FileList;

    }



    /**

     * 讀取文件內容,以字符串的方式返回

     * 

     * @param file

     *            需要讀取的文件名

     * @return 返回讀取的文件內容構成的字符串,行之間用\r\n進行分割

     * @throws FileNotFoundException

     * @throws IOException

     */

    public static String readFile(String file) throws FileNotFoundException,

            IOException {

        StringBuffer strSb = new StringBuffer(); // String is constant,

                                                    // StringBuffer can be

                                                    // changed.

        InputStreamReader inStrR = new InputStreamReader(new FileInputStream(

                file), "gbk"); // byte streams to character streams

        BufferedReader br = new BufferedReader(inStrR);

        String line = br.readLine();

        while (line != null) {

            strSb.append(line).append("\r\n");

            line = br.readLine();

        }



        return strSb.toString();

    }

    // 其他,一般读取文件的时候,利用bufferedReader方便,逐行写入文件的时候利用printStream比较方便



}
FileUtil

 

3,字符串工具类

package com.bobo.util;



import java.util.Stack;

import java.util.regex.Pattern;



public class StringUtil {

    /**

     * 查找左右匹配型符号的位置

     * 

     * @param str

     *            需要查找的字符串

     * @param cLeft

     *            左侧符号

     * @param cRight

     *            右侧符号

     * @return 返回和第一个左侧符号匹配的右侧符号位置,否则返回-1

     */



    public static int findRightMatchChar(String str, String cLeft, String cRight) {

        Stack<Integer> stack = new Stack<Integer>();

        boolean pushAtLeastOnce = false;

        for (int i = 0; i < str.length(); i++) {



            if (str.substring(i, i + 1).equals(cLeft)) {

                stack.push(i);

                pushAtLeastOnce = true;

            }

            if (str.substring(i, i + 1).equals(cRight)) {

                stack.pop();

            }



            if (pushAtLeastOnce && stack.isEmpty()) {

                return i;

            }

        }

        return -1;

    }



    /**

     * 判断是否为null或空�?

     * 

     * @param str

     *            String

     * @return true or false

     */

    public static boolean isNullOrEmpty(String str) {

        return str == null || str.trim().length() == 0;

    }



    /**

     * 判断str1和str2是否相同

     * 

     * @param str1

     *            str1

     * @param str2

     *            str2

     * @return true or false

     */

    public static boolean equals(String str1, String str2) {

        return str1 == str2 || str1 != null && str1.equals(str2);

    }



    /**

     * 判断str1和str2是否相同(不区分大小写)

     * 

     * @param str1

     *            str1

     * @param str2

     *            str2

     * @return true or false

     */

    public static boolean equalsIgnoreCase(String str1, String str2) {

        return str1 != null && str1.equalsIgnoreCase(str2);

    }



    /**

     * 判断字符串str1是否包含字符串str2

     * 

     * @param str1

     *            源字符串

     * @param str2

     *            指定字符�?

     * @return true源字符串包含指定字符串,false源字符串不包含指定字符串

     */

    public static boolean contains(String str1, String str2) {

        return str1 != null && str1.contains(str2);

    }



    /**

     * 判断字符串是否为空,为空则返回一个空值,不为空则返回原字符串

     * 

     * @param str

     *            待判断字符串

     * @return 判断后的字符�?

     */

    public static String getString(String str) {

        return str == null ? "" : str;

    }

    /**

     * 判断字符串是否为数字

     * @param str

     * @return 

     */

    public static boolean isNumeric(Object str) {

        Pattern pattern = Pattern.compile("[0-9]*");

        return pattern.matcher(str.toString()).matches();

    }

    /**

     * 判断字符串是否为英文字母

     * @param str

     * @return

     */

    public static boolean isEnglish(Object str) {

        Pattern pattern = Pattern.compile("[a-z]*");

        return pattern.matcher(str.toString()).matches();

                 

    }

}
StringUtil

 4,在java中运行shell命令的相关工具类

package com.bobo.util;



import java.io.BufferedReader;

import java.io.IOException;

import java.io.InputStreamReader;



public class CommandHelper {

    // default time out, in millseconds

    public static int DEFAULT_TIMEOUT;

    public static final int DEFAULT_INTERVAL = 1000;

    public static long START;



    public static void main(String[] args) {

        DEFAULT_TIMEOUT = 10000;

        try {

            System.out

                    .println(new CommandHelper().exec("wc -l *.*").toString());

        } catch (IOException e) {

            // TODO Auto-generated catch block

            e.printStackTrace();

        } catch (InterruptedException e) {

            // TODO Auto-generated catch block

            e.printStackTrace();

        }

    }



    public static CommandResult exec(String command) throws IOException,

            InterruptedException {

        Process process = Runtime.getRuntime().exec(command);

        CommandResult commandResult = wait(process);

        if (process != null) {

            process.destroy();

        }

        return commandResult;

    }



    private static boolean isOverTime() {

        return System.currentTimeMillis() - START >= DEFAULT_TIMEOUT;

    }



    private static CommandResult wait(Process process)

            throws InterruptedException, IOException {

        BufferedReader errorStreamReader = null;

        BufferedReader inputStreamReader = null;

        try {

            errorStreamReader = new BufferedReader(new InputStreamReader(

                    process.getErrorStream()));

            inputStreamReader = new BufferedReader(new InputStreamReader(

                    process.getInputStream()));



            // timeout control

            START = System.currentTimeMillis();

            boolean isFinished = false;



            for (;;) {

                if (isOverTime()) {

                    CommandResult result = new CommandResult();

                    result.setExitValue(CommandResult.EXIT_VALUE_TIMEOUT);

                    result.setOutput("Command process timeout");

                    return result;

                }



                if (isFinished) {

                    CommandResult result = new CommandResult();

                    result.setExitValue(process.waitFor());



                    // parse error info

                    if (errorStreamReader.ready()) {

                        StringBuilder buffer = new StringBuilder();

                        String line;

                        while ((line = errorStreamReader.readLine()) != null) {

                            buffer.append(line);

                        }

                        result.setError(buffer.toString());

                    }



                    // parse info

                    if (inputStreamReader.ready()) {

                        StringBuilder buffer = new StringBuilder();

                        String line;

                        while ((line = inputStreamReader.readLine()) != null) {

                            buffer.append(line);

                        }

                        result.setOutput(buffer.toString());

                    }

                    return result;

                }



                try {

                    isFinished = true;

                    process.exitValue();

                } catch (IllegalThreadStateException e) {

                    // process hasn't finished yet

                    isFinished = false;

                    Thread.sleep(DEFAULT_INTERVAL);

                }

            }



        } finally {

            if (errorStreamReader != null) {

                try {

                    errorStreamReader.close();

                } catch (IOException e) {

                }

            }



            if (inputStreamReader != null) {

                try {

                    inputStreamReader.close();

                } catch (IOException e) {

                }

            }

        }

    }

}
CommandHelper

 

package com.bobo.util;



public class CommandResult {

    public static final int EXIT_VALUE_TIMEOUT = -1;



    private String output;



    void setOutput(String error) {

        output = error;

    }



    public String getOutput() {

        return output;

    }



    int exitValue;



    void setExitValue(int value) {

        exitValue = value;

    }



    int getExitValue() {

        return exitValue;

    }



    private String error;



    /**

     * @return the error

     */

    public String getError() {

        return error;

    }



    /**

     * @param error

     *            the error to set

     */

    public void setError(String error) {

        this.error = error;

    }



    @Override

    public String toString() {



        return "output:" + this.output + ";error:" + this.error + ";exitValue:"

                + this.exitValue;

    }

}
CommandResult

 5,过滤某个目录下以特定后缀结尾的文件

package com.bobo.myinterface;



import java.io.File;

import java.io.FileFilter;



public class MyFileFilter implements FileFilter {

    private String suffix;



    public MyFileFilter(String suffix) {

        this.suffix = suffix;

    }



    @Override

    public boolean accept(File arg0) {

        if (arg0.isDirectory() || arg0.getAbsolutePath().endsWith(this.suffix)) {

            return true;

        } else {

            return false;

        }

    }



}
文件过滤器

在fileUtil中添加showAllFile方法

    public static void showAllFiles(File dir,FileFilter filter,ArrayList<File> fileList) {

        File[] fs = dir.listFiles(filter);

        for (int i = 0; i < fs.length; i++) {

            if (fs[i].isDirectory()) {

                    showAllFiles(fs[i],filter,fileList);

            }else{

                System.out.println(fs[i].getAbsolutePath());

                fileList.add(fs[i]);

            }

        }

     

    }
showAllFile方法

最终调用

    File dataDir = new File(Constants.DataDir);

        // 得到所有标注过的数据

        ArrayList<File> fileList = new ArrayList<File>();

        FileUtil.showAllFiles(dataDir, new MyFileFilter(".dealed"), fileList);

        System.out.println(fileList.size());

        
列举特定后缀文件的调用方法

 

你可能感兴趣的:(java)