参考博客:点击打开链接
字符串hash典例。 这里用的是bkdrhash 法。也是最常用的冲突最少的一种。原理:把字符串和数值对应。这里用base=31(一般用质数),
先是扫一遍,处理处每个位子到结尾构成的串的hash值(倒过来的),然后长度为l的子串的haash值就好算了。
之后枚举开头l个,每次向后翻滚,复杂度max(L*M, L*(S.SIZE/M))可以过,这里用了map判重下。若枚举开头扫一遍,姿势不优越过不了,极限可能:m=50000,l=1,复杂度(s,size*m)会超时。
关键一:那里求hash值的时候+1,否则100,10这种hash值一样。
开始担心这样减hash值会因为爆出现负值。其实不然:其一, unsigned long long ,自动取模
其二:因为每次从后面向前推导:hash[i] = hash[i+1]*base+s[i]-‘a‘+1; ,本质自动取模,所以:hash[i]=s[i]-‘a‘+1+hash[i+l]*nbase[l] (每步自动取模),由于 s[i]-‘a‘+1 非负,所以有
hash[i]>hash[i+l]*nbase[l]
WA:
#include<iostream> #include<cstdio> #include<cstring> using namespace std; const int maxn = 100050; const int seed = 31; int l,m; unsigned long long base[maxn],h[maxn],seq[maxn],ans[maxn]; char str[maxn]; int calc(int left,int len) { int right = left + len - 1; return h[left] - h[right+1]*base[len]; } int main() { base[0] = 1; for(int i = 1; i <= maxn; i++) base[i] = base[i-1]*seed; while(scanf("%d%d",&m,&l)!=EOF) { getchar(); scanf("%s",str); int len = strlen(str); h[len] = 0; for(int i = len-1; i >= 0; i--) h[i] = h[i+1]*seed + str[i] - 'a' + 1; memset(seq,0,sizeof(seq)); memset(ans,0,sizeof(ans)); for(int i = 0; i < m; i++) ans[i] = 1; for(int i = 0; i < len - m; i++) { int key = calc(i,m); seq[i] = key; if(i < m) continue; if(seq[i] != seq[i-m]) ans[i] = ans[i-m] + 1; } int sum = 0; for(int i = 0; i < len; i++) if(ans[i] >= l) sum += ans[i] - l + 1; printf("%d\n",sum); } return 0; }
#include<iostream> #include<cstdio> #include<cstring> #include<algorithm> #include<map> #include<string> typedef unsigned long long ull; using namespace std; const int maxn= 100050; const ull base =31; ull nbase[maxn],hash[maxn]; int m,l; map<ull, int> mp; int main() { ull tmp; nbase[0] = 1; for (int i = 1;i<maxn; i++) { nbase[i]=nbase[i-1]*base; } while (~scanf("%d%d",&m,&l)) { string s; cin>>s; int slen=s.size(); hash[slen] = 0; for (int i = slen-1; i >= 0; i--) hash[i] = hash[i+1]*base+s[i]-'a'+1; //关键1 int ans = 0; for (int i = 0; i<l&&i+m*l<=slen; i++) { mp.clear(); for (int j = i; j<i+m*l; j += l) { tmp = hash[j] - hash[j+l]*nbase[l]; mp[tmp]++; } if (mp.size() ==m) ans++; for (int j=i+m*l; j+l<=slen; j +=l) { tmp = hash[j-m*l] - hash[j-(m-1)*l]*nbase[l]; mp[tmp]--; if (mp[tmp] == 0) mp.erase(tmp); tmp = hash[j] - hash[j+l]*nbase[l]; mp[tmp]++; if (mp.size() == m) ans++; } } printf("%d\n", ans); } return 0; }