当使用StringTokenizer拆分的字符不能达到预期结果时,就需要我们自定义实现符合需求的功能了。
package com.kevin.test; import java.util.StringTokenizer; import com.kevin.tokenizer.StringToken; /** * 使用StringTokenizer拆分的字符不能达到预期的结果 * <p> * 将以下字符使用"中国"分割 * </p> * <p> * 中国人民银行,我来自中国,ABC国中DEF中国GHIJ国家KLM中央NOPQRSTUVWXYZ * </p> * * @author kevin * */ public class MyTokenTest { public static void main(String[] args) { String str = "中国人民银行,我来自中国,ABC国中DEF中国GHIJ国家KLM中央NOPQRSTUVWXYZ"; String delims = "中国"; int count = 1000000; System.out.println(useJDK(str, delims) + "->|use time:" + useJDKTimes(str, delims, count)); System.out.println(myStringToken(str, delims) + "->|use time:" + myStringTokenTimes(str, delims, count)); } public static String useJDK(String str, String delims) { StringTokenizer tokenizer = new StringTokenizer(str, delims); StringBuilder builder = new StringBuilder(str.length()); while (tokenizer.hasMoreTokens()) { builder.append(tokenizer.nextToken() + "\t"); } return builder.toString(); } public static long useJDKTimes(String str, String delims, int count) { long start = System.currentTimeMillis(); while (count > 0) { StringTokenizer tokenizer = new StringTokenizer(str, delims); StringBuilder builder = new StringBuilder(str.length()); while (tokenizer.hasMoreTokens()) { builder.append(tokenizer.nextToken()); } builder.toString(); count--; } return System.currentTimeMillis() - start; } public static String myStringToken(String str, String delims) { StringToken token = new StringToken(str, delims); StringBuilder builder = new StringBuilder(str.length()); while (token.hasMoreTokens()) { builder.append(token.nextToken() + "\t"); } return builder.toString(); } public static long myStringTokenTimes(String str, String delims, int count) { long start = System.currentTimeMillis(); while (count > 0) { StringToken token = new StringToken(str, delims); StringBuilder builder = new StringBuilder(str.length()); while (token.hasMoreTokens()) { builder.append(token.nextToken()); } builder.toString(); count--; } return System.currentTimeMillis() - start; } }运行结果:
#使用JDK自带的工具类测试:
人民银行,我来自 ,ABC DEF GHIJ家KLM央NOPQRSTUVWXYZ->|use time:765
#使用自定义StringToken测试:
人民银行,我来自 ,ABC国中DEF GHIJ国家KLM中央NOPQRSTUVWXYZ->|use time:654
package com.kevin.tokenizer; /** * @author kevin */ public class StringToken { private char[] srcChars; private char[] delims; public StringToken(String src, String delim) { srcChars = src.toCharArray(); delims = delim.toCharArray(); } private int pos = 0; private Status status = Status.UNSTART; private int countIndex = 0; private int endIndex = 0; public boolean hasMoreTokens() { if (status == Status.COMPLETE) { return false; } countIndex = 0; for (int s = pos; s < srcChars.length; s++) { if (status == Status.UNSTART) { if (srcChars[s] == delims[countIndex]) { countIndex++; } else { countIndex = 0; if (srcChars[s] == delims[countIndex]) { countIndex++; } } if (delims.length == countIndex) { endIndex = s + 1; status = Status.START; /* 主要用户处理最开始的字符就是匹配字符问题 */ if (endIndex != delims.length) { return true; } else { pos = endIndex; return hasMoreTokens(); } } } else { if (srcChars[s] == delims[countIndex]) { if (delims.length == countIndex + 1) { endIndex = s + 1; if (endIndex - pos != delims.length) { return true; } else { pos = endIndex; return hasMoreTokens(); } } } else { status = Status.UNSTART; // countIndex = 0; } } } if (srcChars.length > pos) { status = Status.COMPLETE; return true; } return false; } public String nextToken() { if (status == Status.COMPLETE) { try { int length = srcChars.length - pos; if (length == 0) { return ""; } char[] res = new char[length]; System.arraycopy(srcChars, pos, res, 0, res.length); return toString(res); } finally { pos = endIndex; } } else { try { int length = endIndex - pos - delims.length; if (length < 1) { return ""; } char[] res = new char[length]; System.arraycopy(srcChars, pos, res, 0, res.length); return toString(res); } finally { pos = endIndex; status = Status.UNSTART; } } } public String toString(char[] char_array) { if (char_array.length == 0) { return ""; } StringBuilder builder = new StringBuilder(); for (int i = 0; i < char_array.length; i++) { builder.append(char_array[i]); } return builder.toString(); } public enum Status { UNSTART, START, COMPLETE } }