hdu4821 字符串hash

参考博客:点击打开链接


字符串hash典例。 这里用的是bkdrhash 法。也是最常用的冲突最少的一种。原理:把字符串和数值对应。这里用base=31(一般用质数),

先是扫一遍,处理处每个位子到结尾构成的串的hash值(倒过来的),然后长度为l的子串的haash值就好算了。

之后枚举开头l个,每次向后翻滚,复杂度max(L*M, L*(S.SIZE/M))可以过,这里用了map判重下。若枚举开头扫一遍,姿势不优越过不了,极限可能:m=50000,l=1,复杂度(s,size*m)会超时。 

关键一:那里求hash值的时候+1,否则100,10这种hash值一样。

开始担心这样减hash值会因为爆出现负值。其实不然:其一,  unsigned long long ,自动取模                                                                                     

其二:因为每次从后面向前推导:hash[i] = hash[i+1]*base+s[i]-‘a‘+1; ,本质自动取模,所以:hash[i]=s[i]-‘a‘+1+hash[i+l]*nbase[l] (每步自动取模),由于 s[i]-‘a‘+1  非负,所以有   

hash[i]>hash[i+l]*nbase[l]              


   WA:

#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;

const int maxn = 100050;
const int seed = 31;
int l,m;
unsigned long long base[maxn],h[maxn],seq[maxn],ans[maxn];
char str[maxn];

int calc(int left,int len)
{
	int right = left + len - 1;
	return h[left] - h[right+1]*base[len];
}

int main()
{	
	base[0] = 1;
	for(int i = 1; i <= maxn; i++)
		base[i] = base[i-1]*seed;
	while(scanf("%d%d",&m,&l)!=EOF)
	{
		getchar();
		scanf("%s",str);
		int len = strlen(str);
		h[len] = 0;
		for(int i = len-1; i >= 0; i--)
			h[i] = h[i+1]*seed + str[i] - 'a' + 1;
		memset(seq,0,sizeof(seq));
		memset(ans,0,sizeof(ans));
		for(int i = 0; i < m; i++) ans[i] = 1;
		for(int i = 0; i < len - m; i++)
		{
			int key = calc(i,m);
			seq[i] = key;
			if(i < m) continue;
			if(seq[i] != seq[i-m])
				ans[i] = ans[i-m] + 1;
		}
		int sum = 0;
		for(int i = 0; i < len; i++)
			if(ans[i] >= l)
				sum += ans[i] - l + 1;
		printf("%d\n",sum);
	}
	return 0;
}


AC:

#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
#include<map>
#include<string>
typedef unsigned long long ull;
using namespace std;
const int maxn= 100050;
const ull base =31;
ull nbase[maxn],hash[maxn];
int m,l;
map<ull, int> mp;
int main()
{
	ull tmp;
	nbase[0] = 1;
	for (int i = 1;i<maxn; i++)
		{
		    nbase[i]=nbase[i-1]*base;
		}
	while (~scanf("%d%d",&m,&l))
	 {
	     string s;
		cin>>s;
		int slen=s.size();
		hash[slen] = 0;
		for (int i = slen-1; i >= 0; i--)
			hash[i] = hash[i+1]*base+s[i]-'a'+1;           //关键1
		int ans = 0;
		for (int i = 0; i<l&&i+m*l<=slen; i++)
		{
			mp.clear();
			for (int j = i; j<i+m*l; j += l)
			{
				tmp = hash[j] - hash[j+l]*nbase[l];
				mp[tmp]++;
			}
			if (mp.size() ==m)
				ans++;
			for (int j=i+m*l; j+l<=slen; j +=l)
			 {
				tmp = hash[j-m*l] - hash[j-(m-1)*l]*nbase[l];
				mp[tmp]--;
				if (mp[tmp] == 0)
                                mp.erase(tmp);
				tmp = hash[j] - hash[j+l]*nbase[l];
				mp[tmp]++;
				if (mp.size() == m)
					ans++;
			}
		}
		printf("%d\n", ans);
	}
	return 0;
}



你可能感兴趣的:(数据结构)