先KMP
如果出题人不卡hash的话只模一次应该问题不会太大。。。貌似数据范围到达模数开根值以上时碰撞会变得可能性大一点
我选了1e8的模数,10000的数据刚好够(是指10000个不同的字符串)
实在不行就双hash…对每个保存两个hash值,hash1 hash2 只有两个串hash1 hash2都相等时才能判断他们相同
具体操作就是把第一次hash的代码复制一下改一下,注意看清楚模数。。。但是会慢一倍
别取常见质数。。打个质数表背一背常用的
#include
#include
#include
#include
#include
using namespace std;
#define debug(x) cerr << #x << "=" << x << endl;
const int MAXN = 1000000 + 10;
const int MOD3 = 100000123;
int l=1,r=0,k,n,mians,sum[MAXN],t[MAXN],dif[MAXN],h,next[MAXN];
int g,ans[MAXN],tot;
string a,b;
char c[MAXN],d[MAXN];
typedef unsigned long long ll;
void initNext(string p) {
int len = p.length();
memset(next, -1, sizeof(next));
for (int i = 1, j = -1; i < len; i++) {
while (j != -1 && p[i] != p[j + 1]) j = next[j];
if (p[i] == p[j + 1]) j++;
next[i] = j;
}
}
int KMP(string p, string t) {
int m = p.length(), n = t.length(), res = 0;
for (int i = 0, j = -1; i < n; i++) {
while (j != -1 && t[i] != p[j + 1]) j = next[j];
if (t[i] == p[j + 1]) j++;
if (j == m - 1) j = next[j], res++;
}
return res;
}
long long prefix[MAXN];
int main() {
cin >> a >> b;
int ls1 = a.length(), ls2 = b.length();
for(int i=0; i1] = a[i];
}
for(int i=0; i1] = b[i];
}
int p = 131;
for(int i=1; i<=ls1; i++) {
long long temp = prefix[i-1] * p + c[i];
temp %= MOD3;
prefix[i] = temp;
}
long long temp = 0;
for(int i=1; i<=ls2; i++) {
temp = (temp * p + d[i]) % MOD3;
}
int s2jud = temp;
long long pn = 1;//这个pn一定要开long long 做这种题一定注意多开long long
for(int i=1; i<=ls2; i++) {
pn = pn * p % MOD3;//有乘就要小心爆int 大不了有乘法的算式都开成long long 然后乘pn是要补上一些没乘上的东西
}
for(int i=ls2; i<=ls1; i++) {
int judd = (prefix[i] - prefix[i-ls2]*pn%MOD3 + MOD3) % MOD3;//减法取模是( a%p - b%p ) % p 注意这个%p是要取正余数
if(judd == s2jud) {
printf("%d\n", i-ls2+1);
}
}
initNext(b);
for(int i=0; icout << next[i]+1 << " ";
return 0;
}
双hash(慢一倍)
#include
#include
#include
#include
#include
using namespace std;
#define debug(x) cerr << #x << "=" << x << endl;
const int MAXN = 1000000 + 10;
const int MOD3 = 100000037;
const int MOD7 = 100000073;
int l=1,r=0,k,n,mians,sum[MAXN],t[MAXN],dif[MAXN],h,next[MAXN];
int g,ans[MAXN],tot,ls1,ls2,p;
string a,b;
char c[MAXN],d[MAXN];
void initNext(string p) {
int len = p.length();
memset(next, -1, sizeof(next));
for (int i = 1, j = -1; i < len; i++) {
while (j != -1 && p[i] != p[j + 1]) j = next[j];
if (p[i] == p[j + 1]) j++;
next[i] = j;
}
}
int KMP(string p, string t) {
int m = p.length(), n = t.length(), res = 0;
for (int i = 0, j = -1; i < n; i++) {
while (j != -1 && t[i] != p[j + 1]) j = next[j];
if (t[i] == p[j + 1]) j++;
if (j == m - 1) j = next[j], res++;
}
return res;
}
long long prefix1[MAXN], prefix2[MAXN];
void hash1(int MOD) {
for(int i=1; i<=ls1; i++) {
long long temp = prefix1[i-1] * p + c[i];
temp %= MOD;
prefix1[i] = temp;
}
}
void hash2(int MOD) {
for(int i=1; i<=ls1; i++) {
long long temp = prefix2[i-1] * p + c[i];
temp %= MOD;
prefix2[i] = temp;
}
}
int s2hash(int MOD) {
long long temp = 0;
for(int i=1; i<=ls2; i++) {
temp = (temp * p + d[i]) % MOD;
}
return temp;
}
int main() {
cin >> a >> b;
ls1 = a.length(), ls2 = b.length();
for(int i=0; i1] = a[i];
}
for(int i=0; i1] = b[i];
}
p = 131;
hash1(MOD3);
hash2(MOD7);
int s2jud11 = s2hash(MOD3);
int s2jud22 = s2hash(MOD7);
long long pn = 1;//这个pn一定要开long long 做这种题一定注意多开long long
long long pn2 = 1;
for(int i=1; i<=ls2; i++) {
pn = pn * p % MOD3;//有乘就要小心爆int 大不了有乘法的算式都开成long long 然后乘pn是要补上一些没乘上的东西
pn2 = pn2 * p % MOD7;
}
for(int i=ls2; i<=ls1; i++) {
int judd1 = (prefix1[i] - prefix1[i-ls2]*pn%MOD3 + MOD3) % MOD3;//减法取模是( a%p - b%p ) % p 注意这个%p是要取正余数
int judd2 = (prefix2[i] - prefix2[i-ls2]*pn2%MOD7 + MOD7) % MOD7;
if(judd1 == s2jud11 && judd2 == s2jud22) {
printf("%d\n", i-ls2+1);
}
}
initNext(b);
for(int i=0; icout << next[i]+1 << " ";
return 0;
}
然后是判断是否出现过
若模数太大数组存不下可以把hash值存下来,然后排个序,扫一遍,看相邻两个是否相同
#include
#include
#include
#include
#include
using namespace std;
#define debug(x) cerr << #x << "=" << x << endl;
const int MAXN = 1000000 + 10;
const int MOD3 = 100000123;
int l=1,r=0,k,n,mians,sum[MAXN],t[MAXN],dif[MAXN],h,next[MAXN];
int g,ans;
int vis[MOD3];
int p = 131;
char a[MAXN],d[MAXN];
int hash(char *a) {
long long temp = 0;
int len = strlen(a);
for(int i=0; ireturn temp;
}
int main() {
scanf("%d", &n);
for(int i=1; i<=n; i++) {
scanf("%s", a);
int t = hash(a);
if(vis[t]) continue;
else vis[t] = 1, ans++;
}
printf("%d", ans);
return 0;
}