昨晚看了一篇关于暴雪三重hash算法的文章,有意实践一下。
以前做字符串hash的时候想到过用另一个hash_key去区别同位置的字符串(用strcmp来比较两个字符串是否相等真的好慢),但是因为理论上不可能有hash_key可以唯一的让一个string区别与另一个string,所以一直认为这么做不可行。一直用开地址链表的方法做hash.
看完暴雪三重hash这个文章之后,感觉只要选择两个(或者更多)好的hash函数,就可以在很低很低的错误概率下(如果程序要求绝对精确,则此法失效),通过两个key值来区别不同的string.
poj3297 open source
题目大意:
一个公告板,上面可以写开源项目的标题(大写字母),然后想参与的人可以写上自己的名字(小写字母),最后统计每个项目参与的人数。
如果有的人在不同的项目下面都写了名,则这个人名不计数。
题目数据,题目结果数据,题目官方c++程序
poj3297 open source
我的程序:
/* * ===================================================================================== * * Filename: 3297.c * * Description: * * Version: 1.0 * Created: 2012年03月13日 18时08分33秒 * Revision: none * Compiler: gcc * * Author: MaZheng (blog.csdn.net/mazheng1989), [email protected] * Company: Dalian University Of Technology * * ===================================================================================== */ #include<stdio.h> #include<string.h> #include<stdlib.h> #define LEN 20 /* */ #define HASH_TABLE_LEN 9999 /* */ #define PROJECT_NUM 100 /* */ //please declare parameters here. struct Project{ char name[LEN]; int num; }projects[PROJECT_NUM]; struct HashNode{ //we use key_A and key_B to diff the strings unsigned int key_A; //key_A unsigned int key_B; //key_B int project_ID; //which Project this string first emerged in //if this node is NULL,Project_ID is -1 }HashTable[HASH_TABLE_LEN]; int project_index; //please declare functions here. void init_hashtable() { int i=0; for(i=0;i<HASH_TABLE_LEN;i++) { HashTable[i].project_ID=-1; } } // RS Hash Function unsigned int RSHash( char * str) { unsigned int b = 378551 ; unsigned int a = 63689 ; unsigned int hash = 0 ; while ( * str) { hash = hash * a + ( * str ++ ); a *= b; } return (hash & 0x7FFFFFFF ); } // JS Hash Function unsigned int JSHash( char * str) { unsigned int hash = 1315423911 ; while ( * str) { hash ^= ((hash << 5 ) + ( * str ++ ) + (hash >> 2 )); } return (hash & 0x7FFFFFFF ); } // ELF Hash Function unsigned int ELFHash( char * str) { unsigned int hash = 0 ; unsigned int x = 0 ; while ( * str) { hash = (hash << 4 ) + ( * str ++ ); if ((x = hash & 0xF0000000L ) != 0 ) { hash ^= (x >> 24 ); hash &= ~ x; } } return (hash & 0x7FFFFFFF ); } void count_student(char str[20],int project_ID) { unsigned int key=RSHash(str)%HASH_TABLE_LEN; unsigned int key_A=JSHash(str); unsigned int key_B=ELFHash(str); while(HashTable[key].project_ID!=-1) { if(key_A==HashTable[key].key_A&&key_B==HashTable[key].key_B) { if(HashTable[key].project_ID==project_ID) { return; } else if(HashTable[key].project_ID==-2) { return; } else { projects[HashTable[key].project_ID].num--; HashTable[key].project_ID=-2; } return; } key=(key+1)%HASH_TABLE_LEN; } HashTable[key].key_A=key_A; HashTable[key].key_B=key_B; HashTable[key].project_ID=project_ID; projects[project_ID].num++; } int compare(const void *a,const void *b) { struct Project *p1=(struct Project *)a; struct Project *p2=(struct Project *)b; if(p1->num!=p2->num) return p2->num-p1->num; return strcmp(p1->name,p2->name); } void output() { qsort(projects,project_index+1,sizeof(struct Project),compare); int i; for(i=0;i<=project_index;i++) { int str_len=strlen(projects[i].name); projects[i].name[str_len-1]='\0'; printf("%s %d\n",projects[i].name,projects[i].num); } } int main() { if(freopen("input.txt","r",stdin)==NULL) perror("Can not open the input file!"); //input your ... char input[LEN]; while(fgets(input,LEN,stdin)&&input[0]!='0') { init_hashtable(); strcpy(projects[0].name,input); // printf("project name:%s\n",input); projects[0].num=0; project_index=0; while(fgets(input,LEN,stdin)&&input[0]!='1') { if(input[0]>='A'&&input[0]<='Z')//project name { project_index++; strcpy(projects[project_index].name,input); // printf("project name:%s\n",input); projects[project_index].num=0; } else { count_student(input,project_index); // printf("stduent name:%s\n",input); } } output(); } return 0; }这个程序刷到了头一页上(本人很少程序能到第一页),很高兴。
不要说你改进了那个算法,把效率提高了几倍!好的算法是可以几十倍,几百倍的提高效率的! ---杨老师。