数据结构-散列

散列(hashing):是一种以常数平均时间执行插入、删除和查找的技术。

散列表(hash table):是包含有关键字的具有固定大小的数组。表的大小记作table-size。每个关键字被映射到从0到table-size-1这个范围中的某个数,并且被放到适当的单元中。这个映射叫作散列函数(hash function)。

散列函数:一个好的方法为保证表的大小为素数。

一个好的散列函数:

int Hash(int x, int tableSize)
{
	return x%tableSize;
}

冲突:当一个元素被插入时,另一个元素已经存在,则产生一个冲突。

冲突消除的方法:最简单的两种方法为:分离链表法和开发定址法

  1. 分离链表法:将散列到同一个值的所有元素保存到一个表中。表的类型声明如下:
    struct listNode;
    typedef struct listNode* position;
    struct hashTbl;
    typedef struct hashTbl* hashTable;
    
    
    struct listNode
    {
    	int element;
    	position next;
    };
    
    
    typedef position list;
    struct hashTbl
    {
    	int tableSize;
    	list* theLists;
    };
    
    
    int Hash(int x, int tableSize);
    hashTable initializeTable(int tableSize);
    position find(int key, hashTable h);
    void insert(int key, hashTable h);
    int isPrime(int x);
    int nextPrime(int x);

    表的初始化:
    hashTable initializeTable(int tableSize)
    {
    	hashTable h;
    	int i;
    	h = (struct hashTbl *)malloc(sizeof(struct hashTbl));
    	if (h==NULL)
    	{
    		cout << "Out of space!" << endl;
    	}
    	h->tableSize =nextPrime( tableSize);
    	h->theLists = (list *)malloc(sizeof(list)*h->tableSize);
    	if (h->theLists==NULL)
    	{
    		cout << "Out of space!" << endl;
    	}
    	for (i = 0; i < h->tableSize; i++)
    	{
    		h->theLists[i] = (listNode *)malloc(sizeof(struct listNode));
    		if (h->theLists[i] == NULL)
    			cout << "Out of space!" << endl;
    		else
    			h->theLists[i]->next = NULL;
    	}
    	return h;
    }

    Find例程:
    position find(int key, hashTable h)
    {
    	position p;
    	list l;
    	l = h->theLists[Hash(key, h->tableSize)];
    	p = l->next;
    	while (p!=NULL&&p->element!=key)
    	{
    		p = p->next;
    	}
    	return p;
    }
    

    Insert例程:
    void insert(int key, hashTable h)
    {
    	position pos, newCell;
    	list l;
    	pos = find(key, h);
    	if (pos==NULL)
    	{
    		newCell = (listNode*)malloc(sizeof(struct listNode));
    		if (newCell==NULL)
    		{
    			cout << "Out of space!" << endl;
    		}
    		else
    		{
    			l = h->theLists[Hash(key, h->tableSize)];
    			newCell->next = l->next;
    			newCell->element = key;
    			l->next = newCell;
    		}
    	}
    }

    nextPrime函数的实现:求大于等于当前值的最小素数。
    int nextPrime(int x)
    {
    	if (isPrime(x))
    		return x;
    	else
    	{
    		x = x + 1;
    		while (!isPrime(x))
    		{
    			x = x + 1;
    		}
    		return x;
    	}
    }
    
    
    int isPrime(int x){
    	int i;
    	for ( i = 2; i <=sqrt(x); i++)
    	{
    		if (x%i == 0)
    			return 0;
    	}
    	return 1;
    }

                                                                                                                                                                                                   
  2. 开放定址法:冲突发生后:hi(x)=(Hash(x)+F(i))mod TableSize。                                                                                                                                                                         线性探测法:F(i)=i;                                                                                                                                                                                                                                                   平方探测法:F(i)=i²;                                                                                                                                                                                                                                                双散列:F(i)=i*hash₂(x),即选择第二个Hash函数。                                                                                                                                                                                                以平方散列为例。散列结构声明:
    typedef int position;
    struct hashTbl;
    typedef struct hashTbl *hashTable;
    enum kindOfEntry
    {
    	legitiamte,empty,deleted
    };
    struct hashEntry
    {
    	int element;
    	enum kindOfEntry info;
    };
    typedef struct hashEntry cell;
    struct hashTbl
    {
    	int tableSize;
    	cell* theCells;
    };
    
    
    int Hash(int key,int tableSize);
    hashTable initializeTable(int tableSize);
    position find(int key, hashTable h);
    void insert(int key, hashTable h);
    int nextPrime(int x);
    int isPrime(int x);

    初始化例程:
    hashTable initializeTable(int tableSize)
    {
    	hashTable h;
    	int i;
    	h = (hashTbl*)malloc(sizeof(struct hashTbl));
    	if (h == NULL)
    		cout << "Out of space!" << endl;
    	h->tableSize = nextPrime(tableSize);
    	h->theCells = (cell *)malloc(sizeof(cell)*h->tableSize);
    	if (h->tableSize == NULL)
    		cout << "Out of space!" << endl;
    	for (i = 0; i < h->tableSize; i++)
    		h->theCells[i].info = empty;
    	return h;
    }
    

    Find例程:
    position find(int key, hashTable h)
    {
    	position currentPos;
    	int collisionNum;
    	collisionNum = 0;
    	currentPos = Hash(key, h->tableSize);
    	while (h->theCells[currentPos].info!=empty&&h->theCells[currentPos].element!=key)
    	{
    		currentPos += 2 * ++collisionNum - 1;
    		if (currentPos>=h->tableSize)
    		{
    			currentPos -= h->tableSize;
    		}
    	}
    	return currentPos;
    }

    Insert例程:
    void insert(int key, hashTable h)
    {
    	position pos;
    	pos = find(key, h);
    	if (h->theCells[pos].info!=legitiamte)
    	{
    		h->theCells[pos].info = legitiamte;
    		h->theCells[pos].element = key;
    	}
    }

    nextPrime函数的实现:
    int nextPrime(int x)
    {
    		while (!isPrime(x))
    		{
    			x = x + 1;
    		}
    		return x;
    }
    
    
    int isPrime(int x)
    {
    	int i;
    	for ( i =2; i 

    最后,对于使用平方探测法的开放定址散列法,若元素太满,那么操作的运行时间可能过长切Insert操作可能失败。此时,解决办法为:建另外一个大约两倍大的表,扫描原始散列表,计算每一个元素的新散列值并将其插入表中。

你可能感兴趣的:(数据结构)