[draft]字符串匹配 Z_algorithm

Z algorithm 和字符串匹配的几个算法其实类似,主要利用了pattern的重复信息。

具体思想在这里,这个slides写得非常好懂。

还有个demo也很好。

我实现了一下code(暂时还没去测试是否bugfree难过)

public class ZAlgo {
	
	private int [] z;
	private char [] text;
	
	public ZAlgo() {
		z = null;
		text = null;
	}
	
	private int basicPrefixMatch(int index) {
		int i = 0;
		while (index < text.length && text[index] == text[i]) {
			++i;
			++index;
		}
		return i;
	}	
	//                " a  a  b  c  a  a  b  x  a  a  a  z"
	//                " 0     2     4     6     8     10  "
	// wrong answer:   12, 1, 0, 0, 3, 1, 0, 0, 2, 1, 1, 0  
	// correct answer: 12, 1, 0, 0, 3, 1, 0, 0, 2, 2, 1, 0
	public void populateZ(String str) {
		text = str.toCharArray();
		z = new int[text.length];
		int l = 0;
		int r = 0;
		z[0] = text.length;
		for (int index = 1; index < text.length; ++index) {
			if (index > r) {
				z[index] = basicPrefixMatch(index);
				System.out.printf("a:index = %d, z[index] = %d\n", index, z[index]);
				if (z[index] > 0) {
					l = index;
					r = index + z[index] - 1;
				}
				//print_array('a');
			}else {				
				if (z[index - l]  < r - index + 1) {
					z[index] = z[index - l];
					print_array('b');
				} else {
					int i = r + 1;
					int start = r -index + 1;					
					//System.out.printf("debug:index = %d, i = %d, start = %d\n", index, i, start); 
					// note below: compare text array instead of z value array
					while (i < text.length && text[i] == text[start]) {
						++i;
						++start;
					}
					l = index;
					r = i -1;
					z[index] = r - l +1;
					//print_array('c');
				}
			}
			//System.out.printf("index = %d, l=%d, r=%d%n%n", index, l, r);
		}
	}
	public void print_array(char c) {
		System.out.printf("%c:", c);
		for (int i = 0; i < z.length; ++i) {
			System.out.printf("%d, ", z[i]);
		}
		System.out.println();
	}
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		ZAlgo zalgo = new ZAlgo();
		String input = new String("aabcaabxaaaz");
		zalgo.populateZ(input);
		zalgo.print_array('z');
	}

}



你可能感兴趣的:(StringMatching)