1,java分词
package com.bobo.util; import ICTCLAS.I3S.AC.ICTCLAS50; public class Cutwords { public static String Segment(String microblog) { String textSeg = ""; try { ICTCLAS50 testICTCLAS50 = new ICTCLAS50(); String argu = "."; testICTCLAS50.ICTCLAS_Init(argu.getBytes("GB2312")); String sInput = microblog; byte nativeBytes[] = testICTCLAS50.ICTCLAS_ParagraphProcess( sInput.getBytes("GB2312"), 0, 0); String nativeStr = new String(nativeBytes, 0, nativeBytes.length, "GB2312"); textSeg = nativeStr; } catch (Exception ex) { } return textSeg; } }
2,java文件读写
package com.bobo.util; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; public class FileUtil { public static ArrayList<String> FileList = new ArrayList<String>(); /** * 列出某個目錄及其子目錄下所有的文件列表 * * @param filepath * 目錄路徑 * @return 該路徑及其子路經下的所有文件名列表 * @throws FileNotFoundException * @throws IOException */ public static List<String> readDirs(String filepath) throws FileNotFoundException, IOException { try { File file = new File(filepath); if (!file.isDirectory()) { System.out.println("输入的不是目錄名称;"); System.out.println("filepath:" + file.getAbsolutePath()); } else { String[] flist = file.list(); for (int i = 0; i < flist.length; i++) { File newfile = new File(filepath + "/" + flist[i]); if (!newfile.isDirectory()) { FileList.add(newfile.getAbsolutePath()); } else if (newfile.isDirectory()) { readDirs(filepath + "/" + flist[i]); } } } } catch (FileNotFoundException e) { System.out.println(e.getMessage()); } return FileList; } /** * 讀取文件內容,以字符串的方式返回 * * @param file * 需要讀取的文件名 * @return 返回讀取的文件內容構成的字符串,行之間用\r\n進行分割 * @throws FileNotFoundException * @throws IOException */ public static String readFile(String file) throws FileNotFoundException, IOException { StringBuffer strSb = new StringBuffer(); // String is constant, // StringBuffer can be // changed. InputStreamReader inStrR = new InputStreamReader(new FileInputStream( file), "gbk"); // byte streams to character streams BufferedReader br = new BufferedReader(inStrR); String line = br.readLine(); while (line != null) { strSb.append(line).append("\r\n"); line = br.readLine(); } return strSb.toString(); } // 其他,一般读取文件的时候,利用bufferedReader方便,逐行写入文件的时候利用printStream比较方便 }
3,字符串工具类
package com.bobo.util; import java.util.Stack; import java.util.regex.Pattern; public class StringUtil { /** * 查找左右匹配型符号的位置 * * @param str * 需要查找的字符串 * @param cLeft * 左侧符号 * @param cRight * 右侧符号 * @return 返回和第一个左侧符号匹配的右侧符号位置,否则返回-1 */ public static int findRightMatchChar(String str, String cLeft, String cRight) { Stack<Integer> stack = new Stack<Integer>(); boolean pushAtLeastOnce = false; for (int i = 0; i < str.length(); i++) { if (str.substring(i, i + 1).equals(cLeft)) { stack.push(i); pushAtLeastOnce = true; } if (str.substring(i, i + 1).equals(cRight)) { stack.pop(); } if (pushAtLeastOnce && stack.isEmpty()) { return i; } } return -1; } /** * 判断是否为null或空�? * * @param str * String * @return true or false */ public static boolean isNullOrEmpty(String str) { return str == null || str.trim().length() == 0; } /** * 判断str1和str2是否相同 * * @param str1 * str1 * @param str2 * str2 * @return true or false */ public static boolean equals(String str1, String str2) { return str1 == str2 || str1 != null && str1.equals(str2); } /** * 判断str1和str2是否相同(不区分大小写) * * @param str1 * str1 * @param str2 * str2 * @return true or false */ public static boolean equalsIgnoreCase(String str1, String str2) { return str1 != null && str1.equalsIgnoreCase(str2); } /** * 判断字符串str1是否包含字符串str2 * * @param str1 * 源字符串 * @param str2 * 指定字符�? * @return true源字符串包含指定字符串,false源字符串不包含指定字符串 */ public static boolean contains(String str1, String str2) { return str1 != null && str1.contains(str2); } /** * 判断字符串是否为空,为空则返回一个空值,不为空则返回原字符串 * * @param str * 待判断字符串 * @return 判断后的字符�? */ public static String getString(String str) { return str == null ? "" : str; } /** * 判断字符串是否为数字 * @param str * @return */ public static boolean isNumeric(Object str) { Pattern pattern = Pattern.compile("[0-9]*"); return pattern.matcher(str.toString()).matches(); } /** * 判断字符串是否为英文字母 * @param str * @return */ public static boolean isEnglish(Object str) { Pattern pattern = Pattern.compile("[a-z]*"); return pattern.matcher(str.toString()).matches(); } }
4,在java中运行shell命令的相关工具类
package com.bobo.util; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; public class CommandHelper { // default time out, in millseconds public static int DEFAULT_TIMEOUT; public static final int DEFAULT_INTERVAL = 1000; public static long START; public static void main(String[] args) { DEFAULT_TIMEOUT = 10000; try { System.out .println(new CommandHelper().exec("wc -l *.*").toString()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static CommandResult exec(String command) throws IOException, InterruptedException { Process process = Runtime.getRuntime().exec(command); CommandResult commandResult = wait(process); if (process != null) { process.destroy(); } return commandResult; } private static boolean isOverTime() { return System.currentTimeMillis() - START >= DEFAULT_TIMEOUT; } private static CommandResult wait(Process process) throws InterruptedException, IOException { BufferedReader errorStreamReader = null; BufferedReader inputStreamReader = null; try { errorStreamReader = new BufferedReader(new InputStreamReader( process.getErrorStream())); inputStreamReader = new BufferedReader(new InputStreamReader( process.getInputStream())); // timeout control START = System.currentTimeMillis(); boolean isFinished = false; for (;;) { if (isOverTime()) { CommandResult result = new CommandResult(); result.setExitValue(CommandResult.EXIT_VALUE_TIMEOUT); result.setOutput("Command process timeout"); return result; } if (isFinished) { CommandResult result = new CommandResult(); result.setExitValue(process.waitFor()); // parse error info if (errorStreamReader.ready()) { StringBuilder buffer = new StringBuilder(); String line; while ((line = errorStreamReader.readLine()) != null) { buffer.append(line); } result.setError(buffer.toString()); } // parse info if (inputStreamReader.ready()) { StringBuilder buffer = new StringBuilder(); String line; while ((line = inputStreamReader.readLine()) != null) { buffer.append(line); } result.setOutput(buffer.toString()); } return result; } try { isFinished = true; process.exitValue(); } catch (IllegalThreadStateException e) { // process hasn't finished yet isFinished = false; Thread.sleep(DEFAULT_INTERVAL); } } } finally { if (errorStreamReader != null) { try { errorStreamReader.close(); } catch (IOException e) { } } if (inputStreamReader != null) { try { inputStreamReader.close(); } catch (IOException e) { } } } } }
package com.bobo.util; public class CommandResult { public static final int EXIT_VALUE_TIMEOUT = -1; private String output; void setOutput(String error) { output = error; } public String getOutput() { return output; } int exitValue; void setExitValue(int value) { exitValue = value; } int getExitValue() { return exitValue; } private String error; /** * @return the error */ public String getError() { return error; } /** * @param error * the error to set */ public void setError(String error) { this.error = error; } @Override public String toString() { return "output:" + this.output + ";error:" + this.error + ";exitValue:" + this.exitValue; } }
5,过滤某个目录下以特定后缀结尾的文件
package com.bobo.myinterface; import java.io.File; import java.io.FileFilter; public class MyFileFilter implements FileFilter { private String suffix; public MyFileFilter(String suffix) { this.suffix = suffix; } @Override public boolean accept(File arg0) { if (arg0.isDirectory() || arg0.getAbsolutePath().endsWith(this.suffix)) { return true; } else { return false; } } }
在fileUtil中添加showAllFile方法
public static void showAllFiles(File dir,FileFilter filter,ArrayList<File> fileList) { File[] fs = dir.listFiles(filter); for (int i = 0; i < fs.length; i++) { if (fs[i].isDirectory()) { showAllFiles(fs[i],filter,fileList); }else{ System.out.println(fs[i].getAbsolutePath()); fileList.add(fs[i]); } } }
最终调用
File dataDir = new File(Constants.DataDir); // 得到所有标注过的数据 ArrayList<File> fileList = new ArrayList<File>(); FileUtil.showAllFiles(dataDir, new MyFileFilter(".dealed"), fileList); System.out.println(fileList.size());