自定义StringToken PK java.util.StringTokenizer

当使用StringTokenizer拆分的字符不能达到预期结果时,就需要我们自定义实现符合需求的功能了。

package com.kevin.test;

import java.util.StringTokenizer;

import com.kevin.tokenizer.StringToken;

/**
 * 使用StringTokenizer拆分的字符不能达到预期的结果
 * <p>
 * 将以下字符使用"中国"分割
 * </p>
 * <p>
 * 中国人民银行,我来自中国,ABC国中DEF中国GHIJ国家KLM中央NOPQRSTUVWXYZ
 * </p>
 * 
 * @author kevin
 * 
 */
public class MyTokenTest {
	public static void main(String[] args) {
		String str = "中国人民银行,我来自中国,ABC国中DEF中国GHIJ国家KLM中央NOPQRSTUVWXYZ";
		String delims = "中国";
		int count = 1000000;
		System.out.println(useJDK(str, delims) + "->|use time:" + useJDKTimes(str, delims, count));
		System.out.println(myStringToken(str, delims) + "->|use time:" + myStringTokenTimes(str, delims, count));
	}

	public static String useJDK(String str, String delims) {
		StringTokenizer tokenizer = new StringTokenizer(str, delims);
		StringBuilder builder = new StringBuilder(str.length());
		while (tokenizer.hasMoreTokens()) {
			builder.append(tokenizer.nextToken() + "\t");
		}
		return builder.toString();
	}

	public static long useJDKTimes(String str, String delims, int count) {
		long start = System.currentTimeMillis();
		while (count > 0) {
			StringTokenizer tokenizer = new StringTokenizer(str, delims);
			StringBuilder builder = new StringBuilder(str.length());
			while (tokenizer.hasMoreTokens()) {
				builder.append(tokenizer.nextToken());
			}
			builder.toString();
			count--;
		}
		return System.currentTimeMillis() - start;
	}

	public static String myStringToken(String str, String delims) {
		StringToken token = new StringToken(str, delims);
		StringBuilder builder = new StringBuilder(str.length());
		while (token.hasMoreTokens()) {
			builder.append(token.nextToken() + "\t");
		}
		return builder.toString();
	}

	public static long myStringTokenTimes(String str, String delims, int count) {
		long start = System.currentTimeMillis();
		while (count > 0) {
			StringToken token = new StringToken(str, delims);
			StringBuilder builder = new StringBuilder(str.length());
			while (token.hasMoreTokens()) {
				builder.append(token.nextToken());
			}
			builder.toString();
			count--;
		}
		return System.currentTimeMillis() - start;
	}
}
运行结果:

#使用JDK自带的工具类测试:

人民银行,我来自 ,ABC DEF GHIJ家KLM央NOPQRSTUVWXYZ->|use time:765

#使用自定义StringToken测试:

人民银行,我来自 ,ABC国中DEF GHIJ国家KLM中央NOPQRSTUVWXYZ->|use time:654


package com.kevin.tokenizer;

/**
 * @author kevin
 */
public class StringToken {
	private char[] srcChars;
	private char[] delims;

	public StringToken(String src, String delim) {
		srcChars = src.toCharArray();
		delims = delim.toCharArray();
	}

	private int pos = 0;
	private Status status = Status.UNSTART;

	private int countIndex = 0;
	private int endIndex = 0;

	public boolean hasMoreTokens() {
		if (status == Status.COMPLETE) {
			return false;
		}
		countIndex = 0;
		for (int s = pos; s < srcChars.length; s++) {
			if (status == Status.UNSTART) {
				if (srcChars[s] == delims[countIndex]) {
					countIndex++;
				} else {
					countIndex = 0;
					if (srcChars[s] == delims[countIndex]) {
						countIndex++;
					}
				}
				if (delims.length == countIndex) {
					endIndex = s + 1;
					status = Status.START;
					/* 主要用户处理最开始的字符就是匹配字符问题 */
					if (endIndex != delims.length) {
						return true;
					} else {
						pos = endIndex;
						return hasMoreTokens();
					}
				}
			} else {
				if (srcChars[s] == delims[countIndex]) {
					if (delims.length == countIndex + 1) {
						endIndex = s + 1;
						if (endIndex - pos != delims.length) {
							return true;
						} else {
							pos = endIndex;
							return hasMoreTokens();
						}
					}
				} else {
					status = Status.UNSTART;
					// countIndex = 0;
				}
			}
		}
		if (srcChars.length > pos) {
			status = Status.COMPLETE;
			return true;
		}
		return false;
	}

	public String nextToken() {
		if (status == Status.COMPLETE) {
			try {
				int length = srcChars.length - pos;
				if (length == 0) {
					return "";
				}
				char[] res = new char[length];
				System.arraycopy(srcChars, pos, res, 0, res.length);
				return toString(res);
			} finally {
				pos = endIndex;
			}
		} else {
			try {
				int length = endIndex - pos - delims.length;
				if (length < 1) {
					return "";
				}
				char[] res = new char[length];
				System.arraycopy(srcChars, pos, res, 0, res.length);
				return toString(res);
			} finally {
				pos = endIndex;
				status = Status.UNSTART;
			}
		}
	}

	public String toString(char[] char_array) {
		if (char_array.length == 0) {
			return "";
		}
		StringBuilder builder = new StringBuilder();
		for (int i = 0; i < char_array.length; i++) {
			builder.append(char_array[i]);
		}
		return builder.toString();
	}

	public enum Status {
		UNSTART, START, COMPLETE
	}

}


你可能感兴趣的:(DFA)