Z algorithm 和字符串匹配的几个算法其实类似,主要利用了pattern的重复信息。
具体思想在这里,这个slides写得非常好懂。
还有个demo也很好。
我实现了一下code(暂时还没去测试是否bugfree)
public class ZAlgo { private int [] z; private char [] text; public ZAlgo() { z = null; text = null; } private int basicPrefixMatch(int index) { int i = 0; while (index < text.length && text[index] == text[i]) { ++i; ++index; } return i; } // " a a b c a a b x a a a z" // " 0 2 4 6 8 10 " // wrong answer: 12, 1, 0, 0, 3, 1, 0, 0, 2, 1, 1, 0 // correct answer: 12, 1, 0, 0, 3, 1, 0, 0, 2, 2, 1, 0 public void populateZ(String str) { text = str.toCharArray(); z = new int[text.length]; int l = 0; int r = 0; z[0] = text.length; for (int index = 1; index < text.length; ++index) { if (index > r) { z[index] = basicPrefixMatch(index); System.out.printf("a:index = %d, z[index] = %d\n", index, z[index]); if (z[index] > 0) { l = index; r = index + z[index] - 1; } //print_array('a'); }else { if (z[index - l] < r - index + 1) { z[index] = z[index - l]; print_array('b'); } else { int i = r + 1; int start = r -index + 1; //System.out.printf("debug:index = %d, i = %d, start = %d\n", index, i, start); // note below: compare text array instead of z value array while (i < text.length && text[i] == text[start]) { ++i; ++start; } l = index; r = i -1; z[index] = r - l +1; //print_array('c'); } } //System.out.printf("index = %d, l=%d, r=%d%n%n", index, l, r); } } public void print_array(char c) { System.out.printf("%c:", c); for (int i = 0; i < z.length; ++i) { System.out.printf("%d, ", z[i]); } System.out.println(); } public static void main(String[] args) { // TODO Auto-generated method stub ZAlgo zalgo = new ZAlgo(); String input = new String("aabcaabxaaaz"); zalgo.populateZ(input); zalgo.print_array('z'); } }