Implement strStr().
Returns a pointer to the first occurrence of needle in haystack, or null if needle is not part of haystack.
经典的字符串查找问题了。这个问题其实有四种比较经典的算法,这里介绍2种半。因为这里的Boyer-Moore是简易版,全功率版应该是简易版加上KMP法的,那个实在是复杂,面试是不大可能写出来的,除非记熟了。
还有一种是Rabin-Karp fingerprint search,这个是利用hash函数产生所谓的fingerprint,然后对比子串的hash值和原串同样字符长的hash值是否一致,这个算法的关键是如何处理hash值。
下面的三种方法测试LeetCode的运行时间都差不多,大概是因为测试数据不大吧。
1 BF暴力法:
char *strStr(char *haystack, char *needle) { if (!haystack || !needle) return nullptr; int n = strlen(haystack); int m = strlen(needle); for (int i = 0; i <= n - m; i++) { int j = 0; for (; j < m; j++) { if (needle[j] != haystack[j+i]) break; } if (j == m) return haystack + i; } return nullptr; }
3 简易版Boyer Moore法
void bmSkipTable(char *ch, vector<int> &skipTable) { if (!ch) return; skipTable.resize(26); int m = strlen(ch); for (int i = 0; i < m; i++) { skipTable[ch[i] - 'a'] = i; } } char *bmStrStr(char *haystack, char *needle) { if(!haystack || !needle) return haystack; int n = strlen(haystack); int m = strlen(needle); vector<int> skipTable; bmSkipTable(needle, skipTable); int skip = 0; for (int i = 0; i <= n-m; i+=skip) { skip = 0; for (int j = m-1; j >= 0; j--) { if (haystack[i+j] != needle[j]) { skip = j - skipTable[haystack[i+j] - 'a']; if (skip < 1) skip = 1; break; } } if (skip == 0) return haystack + i; } return nullptr; }
2 更新KMP法
//2014-2-20 update char *strStr(char *hay, char *ne) { int len = strlen(hay); int n = strlen(ne); vector<int> tbl = genTbl(ne, n); for (int i = 0, j = 0; i <= len - n; )//改成i<len那么会空串的时候出错 { for ( ; j < len && j < n && hay[i+j] == ne[j]; j++); if (j == n) return hay+i; if (j > 0) { i += j - tbl[j-1] - 1; j = tbl[j-1] + 1; } else i++; } return nullptr; } //修正bug vector<int> genTbl(char *ne, int n) { vector<int> tbl(n, -1); for (int i = 1, j = 0; i < n;) { if (ne[i] == ne[j]) { tbl[i] = j; ++j; ++i; } else if (j > 0) j = tbl[j-1]+1; else i++; } return tbl; }
//2014-2-21 update Boyer-Moore Algorithm char *strStr(char *hay, char *ne) { int n = strlen(ne); if (n == 0) return hay; int len = strlen(hay); vector<int> tbl(256, -1); for (int i = 0; i < n; i++) { tbl[ne[i]] = i; } for (int i = n-1, j = n-1; i < len; ) { for ( ; j >= 0 && hay[i] == ne[j]; j--, i--); if (j < 0) return hay + i + 1; if (tbl[hay[i]] < j) i += n - tbl[hay[i]] - 1; else i += n - j; j = n-1; } return nullptr; }