多核计算与程序设计 - 06 基本算法和数据结构 之三 哈希表与哈希链表

一,哈希表

哈希表:hash(杂乱信息的意思)  的音译,用来把一些杂乱无章的信息根据其关键字的特点映射到一个连续的空间,操作简单,用途广泛,例如:电子词典。

这里用到的映射方法称为 索引方法。对应的实现函数称为哈希函数。将映射后的值称为索引


1) 哈希表的索引方法

整除取余法

对于整数而言,是很常用的一个方法,能够很快的计算出索引值,但有时候会产生较多的相同的索引,使算法退化。

最差的时候会变成顺序查找。

int hashInt1(int nKey, int uBucketCount)
{
    return nKey % uBucketCount;
}

// 一种优化的方式
// 把 uBucketCount 定义为1024, 2048 ... 2的幂
// uMask 的值为 uBucketCount - 1, 这样uMask的二进制值全部为 '1'
// 用key 和 uMask 进行与运算。
int hashInt2(int nKey, int uMask)
{
    return nKey & uMask;
}


折叠法

关键词位数很长时,可以将关键字分割成位数相同的几部分,把每部分转换成计算机可以识别的整数,再将整数相加,得到新的整数,再把得到的新的整数用上面的取余法进行运算,得到hash值。


int hashString(char* strKey, int uBucketCount)
{
    int i = 0;
    int nRet = 0;
    int nHashValue = 0;
    char* p = strKey;
    
    while(*p != '\0')
    {
    <span style="white-space:pre">	</span><span style="color:#ff0000;">// 这里把字符串分组,并把每组转换为一个整数
    <span style="white-space:pre">	</span>// 这个转换方法可以自己定义,按照key的特征灵活选择</span>
        if (i == 5)
        {
            i = 0;
            nRet += nHashValue;
            nHashValue = 0;
        }


    <span style="white-space:pre">	</span>nHashValue += nHashValue << 3;
    <span style="white-space:pre">	</span>nHashValue += (int)(*p);
    <span style="white-space:pre">	</span>p++;
    <span style="white-space:pre">	</span>i++;
    }


    nRet += nHashValue;
<span style="white-space:pre">	</span>return nRet % uBucketCount;
}


平方取中法

将关键字进行平方运算后,再取中间几位作为索引。


//<span style="color:#ff0000;"> 例:平方后取234位(个位为第0位)作为hash值</span>
int hashSqrMid234(int nKey)
{
    int nRet =0;
    int nHashValue = 0;
	nHashValue = nKey * nKey;
	
	nRet = nHashValue / 100000;
	nHashValue = nHashValue /100;
	
	nRet = nHashValue % (nRet * 1000);
	return nRet;
}


随即函数法

用随机函数产生hash值。



2) 哈希表的冲突解决方法

不同的关键字,通过hash算法可能得到相同的hash值,这就产生了冲突。


解决冲突的方法:

链表存储法

将同一索引的关键词放在一个链表中,将哈希表的索引指向链表的表头。


索引探测法

索引探测法的基本思想是发现索引有冲突后,在索引的位置向后查找一个空的索引位置,将数据存放在此索引的位置。

索引探测法公式描述:


      H(i)   =  ( Hash ( key )  +  P(i) )  %  uBucketCount


线性探测法:

当发现索引处已经存储数据时,从索引位置向后按顺序查找一个空索引的位置,将数据存储在这个空位置上。

P(i) = 1


二次探测法 和伪随机探测法:

P(i) = i * i 时 为二次探测法

P(i) = 随机序列时 为伪随机探测法


实例代码:

hash_table.h

#include <iostream>

using namespace std;

#ifndef __HASH_TABLE_H__
#define __HASH_TABLE_H__

typedef struct _SINGLE_NODE
{
    int  nData;
    _SINGLE_NODE *pNext;
}SINGLE_NODE;


class MyHashTable
{
public:
	MyHashTable();
	~MyHashTable();

    int  InsertHashTable(int nData);
    int  FindHash(int nData);
    void DeleteHash(int nData);

private:

    SINGLE_NODE ** m_pBucket;
	int            m_nBucketCount;
	int            m_nNodeCount;
	int            m_nCurrNodeID;
    SINGLE_NODE  * m_pCurrentNode;

	int calcHash(int nKey);
};

#endif


hash_table.cpp

#include "hash_table.h"

int hashInt1(int nKey, int uBucketCount)
{
    return nKey % uBucketCount;
}

// 一种优化的方式
// 把 uBucketCount 定义为1024, 2048 ... 2的幂
// uMask 的值为 uBucketCount - 1, 这样uMask的二进制值全部为 '1'
// 用key 和 uMask 进行与运算。
int hashInt2(int nKey, int uMask)
{
    return nKey & uMask;
}


int hashString(char* strKey, int uBucketCount)
{
    int i = 0;
    int nRet = 0;
    int nHashValue = 0;
    char* p = strKey;
    
    while(*p != '\0')
    {
    	// 这里把字符串分组,并把每组转换为一个整数
    	// 这个转换方法可以自己定义,按照key的特征灵活选择
        if (i == 5)
        {
            i = 0;
            nRet += nHashValue;
            nHashValue = 0;
        }

    	nHashValue += nHashValue << 3;
    	nHashValue += (int)(*p);
    	p++;
    	i++;
    }

    nRet += nHashValue;
	return nRet % uBucketCount;
}

int hashSqrMid234(int nKey)
{
    int nRet =0;
    int nHashValue = 0;
	nHashValue = nKey * nKey;  // 5499025
	
	nRet = nHashValue / 100000;   // 549
	nHashValue = nHashValue /100;   // 54990
	
	nRet = nHashValue % (nRet * 1000);
	return nRet;
}

MyHashTable::MyHashTable()
{
    m_nBucketCount = 1024;
    m_nNodeCount = 0;
	m_nCurrNodeID = 0;
    m_pCurrentNode = NULL;

	m_pBucket = (SINGLE_NODE**)malloc(m_nBucketCount * sizeof(SINGLE_NODE*));
	memset(m_pBucket, 0, m_nBucketCount * sizeof(SINGLE_NODE*));
}

MyHashTable::~MyHashTable()
{
    SINGLE_NODE *pNode = NULL;

    for (int i = 0 ; i < m_nBucketCount; i++)
	{
	    pNode = m_pBucket[i];
		while (pNode != NULL)
		{
		    m_pBucket[i] = pNode->pNext;
            free(pNode);
			pNode = NULL;
			pNode = m_pBucket[i];

		}
	}
}

int MyHashTable::calcHash(int nKey)
{
    return hashInt1(nKey, m_nBucketCount);
}


int MyHashTable::InsertHashTable(int nData)
{
    SINGLE_NODE *pNode = NULL;
    SINGLE_NODE *pNewNode;
	int nHashValue;

    pNewNode = (SINGLE_NODE*)malloc(sizeof(SINGLE_NODE));

    nHashValue = calcHash(nData);
	pNode = m_pBucket[nHashValue];

	cout << "Insert Hash Key: " << nHashValue  << "  Value=" << nData << endl;

	pNewNode->nData = nData;
	pNewNode->pNext = pNode;

    m_pBucket[nHashValue] = pNewNode;
    m_nNodeCount += 1;

	return m_nNodeCount;
}

int MyHashTable::FindHash(int nData)
{
    SINGLE_NODE *pNode;
	int nHashValue;

    nHashValue = calcHash(nData);
	pNode = m_pBucket[nHashValue];

    while(pNode != NULL)
	{
	    if (pNode->nData == nData)
		{
			cout << "FindHash find it Key= " <<nHashValue << " Data=" << pNode->nData << endl;
		    return pNode->nData;
		}
		pNode = pNode->pNext;
	}

	return -1;
}

void MyHashTable::DeleteHash(int nData)
{
    SINGLE_NODE *pNode;
	int nHashValue;

    nHashValue = calcHash(nData);
	pNode = m_pBucket[nHashValue];

    if (pNode != NULL)
	{
	    m_pBucket[nHashValue] = pNode->pNext;
		free(pNode);
		pNode = NULL;
	}
}

main.cpp  测试程序

#include "hash_table.h"

void main()
{
	int testArr[10] = {123, 4332,223,8664,2239,89777,54455,32,23,64322};
    int i = 0;

    MyHashTable hashTable;

	for (i = 0; i < 10; i++)
	{
	    hashTable.InsertHashTable(testArr[i]);
	}


    cout << endl;
	cout << "Finding the hash kes and values" << endl;
	cout << endl;
    for (i = 0; i < 10; i++)
	{
	    if (hashTable.FindHash(testArr[i]) != -1)
		{
		    cout << "========================" << endl;
		}
	}

	hashTable.DeleteHash(testArr[3]);

   cout << endl;
	cout << "Test the 3 the key is deleted" << endl;
	cout << endl;
    for (i = 0; i < 10; i++)
	{
	    if (hashTable.FindHash(testArr[i]) != -1)
		{
		    cout << "========================" << endl;
		}
	}
    cin >> i;

}

测试结果:

Insert Hash Key: 123  Value=123
Insert Hash Key: 236  Value=4332
Insert Hash Key: 223  Value=223
Insert Hash Key: 472  Value=8664
Insert Hash Key: 191  Value=2239
Insert Hash Key: 689  Value=89777
Insert Hash Key: 183  Value=54455
Insert Hash Key: 32  Value=32
Insert Hash Key: 23  Value=23
Insert Hash Key: 834  Value=64322


Finding the hash kes and values


FindHash find it Key= 123 Data=123
========================
FindHash find it Key= 236 Data=4332
========================
FindHash find it Key= 223 Data=223
========================
FindHash find it Key= 472 Data=8664
========================
FindHash find it Key= 191 Data=2239
========================
FindHash find it Key= 689 Data=89777
========================
FindHash find it Key= 183 Data=54455
========================
FindHash find it Key= 32 Data=32
========================
FindHash find it Key= 23 Data=23
========================
FindHash find it Key= 834 Data=64322
========================


Test the 3 the key is deleted


FindHash find it Key= 123 Data=123
========================
FindHash find it Key= 236 Data=4332
========================
FindHash find it Key= 223 Data=223
========================
FindHash find it Key= 191 Data=2239
========================
FindHash find it Key= 689 Data=89777
========================
FindHash find it Key= 183 Data=54455
========================
FindHash find it Key= 32 Data=32
========================
FindHash find it Key= 23 Data=23
========================
FindHash find it Key= 834 Data=64322
========================


二,哈希链表

哈希表可以实现快速的精确查找,但无法实现排序操作,链表可以实现有序的输出,但在查找时时间较长,为了结合哈希表和链表的优点这里实现了一个哈希链表。

哈希链表的数据结构是结合了链表和哈希表的复合数据结构。


实例代码:


hash_link_table.h

#include <iostream>

using namespace std;

#ifndef __HASH_LINK_TABLE_H__
#define __HASH_LINK_TABLE_H__



typedef struct _SINGLE_NODE
{
    int  nData;
    _SINGLE_NODE *pHashNext;
    _SINGLE_NODE *pPrev;
    _SINGLE_NODE *pNext;
}SINGLE_NODE;


class MyHashTable
{
public:
    MyHashTable();
    ~MyHashTable();

    int  InsertHashTable(int nData);
    int  FindHash(int nData);
    void DeleteHash(int nData);
    void ShowSortLink();

private:

    SINGLE_NODE ** m_pBucket;
    int            m_nBucketCount;
    int            m_nNodeCount;
    int            m_nCurrNodeID;
    SINGLE_NODE  * m_pCurrentNode;
    SINGLE_NODE  * m_pLinkHead;
    SINGLE_NODE  * m_pLinkTail;

    int calcHash(int nKey);
};

#endif

hash_link_table.cpp

#include "hash_link_table.h"

int hashInt1(int nKey, int uBucketCount)
{
    return nKey % uBucketCount;
}

// 一种优化的方式
// 把 uBucketCount 定义为1024, 2048 ... 2的幂
// uMask 的值为 uBucketCount - 1, 这样uMask的二进制值全部为 '1'
// 用key 和 uMask 进行与运算。
int hashInt2(int nKey, int uMask)
{
    return nKey & uMask;
}


int hashString(char* strKey, int uBucketCount)
{
    int i = 0;
    int nRet = 0;
    int nHashValue = 0;
    char* p = strKey;

    while(*p != '\0')
    {
        // 这里把字符串分组,并把每组转换为一个整数
        // 这个转换方法可以自己定义,按照key的特征灵活选择
        if (i == 5)
        {
            i = 0;
            nRet += nHashValue;
            nHashValue = 0;
        }

        nHashValue += nHashValue << 3;
        nHashValue += (int)(*p);
        p++;
        i++;
    }

    nRet += nHashValue;
    return nRet % uBucketCount;
}

int hashSqrMid234(int nKey)
{
    int nRet =0;
    int nHashValue = 0;
    nHashValue = nKey * nKey;  // 5499025

    nRet = nHashValue / 100000;   // 549
    nHashValue = nHashValue /100;   // 54990

    nRet = nHashValue % (nRet * 1000);
    return nRet;
}

MyHashTable::MyHashTable()
{
    m_nBucketCount = 1024;
    m_nNodeCount = 0;
    m_nCurrNodeID = 0;
    m_pCurrentNode = NULL;
    m_pLinkHead = NULL;
    m_pLinkTail = NULL;

    m_pBucket = (SINGLE_NODE**)malloc(m_nBucketCount * sizeof(SINGLE_NODE*));
    memset(m_pBucket, 0, m_nBucketCount * sizeof(SINGLE_NODE*));
}

MyHashTable::~MyHashTable()
{
    SINGLE_NODE *pNode = NULL;

    for (int i = 0 ; i < m_nBucketCount; i++)
    {
        pNode = m_pBucket[i];
        while (pNode != NULL)
        {
            m_pBucket[i] = pNode->pNext;
            free(pNode);
            pNode = NULL;
            pNode = m_pBucket[i];

        }
    }
}

int MyHashTable::calcHash(int nKey)
{
    return hashInt1(nKey, m_nBucketCount);
}


int MyHashTable::InsertHashTable(int nData)
{
    SINGLE_NODE *pNode = NULL;
    SINGLE_NODE *pNewNode;
    int nHashValue;

    pNewNode = (SINGLE_NODE*)malloc(sizeof(SINGLE_NODE));

    nHashValue = calcHash(nData);
    pNode = m_pBucket[nHashValue];

    cout << "Insert Hash Key: " << nHashValue  << "  Value=" << nData << endl;

    pNewNode->nData = nData;
    pNewNode->pHashNext = pNode;

    m_pBucket[nHashValue] = pNewNode;
    m_nNodeCount += 1;


    // add to link
    pNewNode->pNext = NULL;
    pNewNode->pPrev = NULL;
    if (m_pLinkHead == NULL)
    {
        m_pLinkHead = pNewNode;
        m_pLinkTail = pNewNode;
    }
    else
    {
        pNode = m_pLinkHead;
        while ((pNode->pNext != NULL) && (pNewNode->nData > pNode->nData))
        {
            pNode = pNode->pNext;
        }

        if ((pNode->pNext == NULL) && (pNewNode->nData >  pNode->nData))
        {
            pNewNode->pPrev = pNode;
            pNode->pNext = pNewNode;
            m_pLinkTail = pNewNode;
        }
        else
        {
            pNewNode->pPrev = pNode->pPrev;
            if (pNode != m_pLinkHead)
            {
                pNode->pPrev->pNext = pNewNode;
            }
            else
            {
                m_pLinkHead = pNewNode;
            }

            pNewNode->pNext = pNode;
            pNode->pPrev = pNewNode;
        }
    }

    return m_nNodeCount;
}

void MyHashTable::ShowSortLink()
{
    SINGLE_NODE *pNode = m_pLinkHead;
    cout << endl;
    cout << endl;
    cout << "Show all the node ==========================" << endl;
    while(pNode != NULL)
    {
        cout << "key= " << pNode->nData << "   ";
        pNode = pNode->pNext;
    }
    cout << endl;
    cout << endl;
}

int MyHashTable::FindHash(int nData)
{
    SINGLE_NODE *pNode;
    int nHashValue;

    nHashValue = calcHash(nData);
    pNode = m_pBucket[nHashValue];

    while(pNode != NULL)
    {
        if (pNode->nData == nData)
        {
            cout << "FindHash find it Key= " <<nHashValue << " Data=" << pNode->nData << endl;
            return pNode->nData;
        }
        pNode = pNode->pNext;
    }

    return -1;
}

void MyHashTable::DeleteHash(int nData)
{
    SINGLE_NODE *pNode;
    int nHashValue;

    nHashValue = calcHash(nData);
    pNode = m_pBucket[nHashValue];

    if (pNode != NULL)
    {
        m_pBucket[nHashValue] = pNode->pNext;
        pNode->pPrev->pNext = pNode->pNext;
        pNode->pNext->pPrev = pNode->pPrev;

        free(pNode);
        pNode = NULL;
    }
}

main.cpp

#include "hash_link_table.h"

void main()
{
    int testArr[10] = {123, 4332,223,8664,2239,89777,54455,32,23,64322};
    int i = 0;

    MyHashTable hashTable;

    for (i = 0; i < 10; i++)
    {
        hashTable.InsertHashTable(testArr[i]);
    }


    cout << endl;
    cout << "Finding the hash kes and values" << endl;
    cout << endl;
    for (i = 0; i < 10; i++)
    {
        if (hashTable.FindHash(testArr[i]) != -1)
        {
            cout << "========================" << endl;
        }
    }

    hashTable.ShowSortLink();

    hashTable.DeleteHash(testArr[3]);

    cout << endl;
    cout << "Test the 3 the key is deleted" << endl;
    cout << endl;
    for (i = 0; i < 10; i++)
    {
        if (hashTable.FindHash(testArr[i]) != -1)
        {
            cout << "========================" << endl;
        }
    }

    hashTable.ShowSortLink();
    cin >> i;
}


测试结果:

Insert Hash Key: 123  Value=123
Insert Hash Key: 236  Value=4332
Insert Hash Key: 223  Value=223
Insert Hash Key: 472  Value=8664
Insert Hash Key: 191  Value=2239
Insert Hash Key: 689  Value=89777
Insert Hash Key: 183  Value=54455
Insert Hash Key: 32  Value=32
Insert Hash Key: 23  Value=23
Insert Hash Key: 834  Value=64322


Finding the hash kes and values


FindHash find it Key= 123 Data=123
========================
FindHash find it Key= 236 Data=4332
========================
FindHash find it Key= 223 Data=223
========================
FindHash find it Key= 472 Data=8664  test deleting
========================
FindHash find it Key= 191 Data=2239
========================
FindHash find it Key= 689 Data=89777
========================
FindHash find it Key= 183 Data=54455
========================
FindHash find it Key= 32 Data=32
========================
FindHash find it Key= 23 Data=23
========================
FindHash find it Key= 834 Data=64322
========================




Show all the node ==========================
key= 23   key= 32   key= 123   key= 223   key= 2239   key= 4332   key= 8664   key= 54455   key= 64322   key= 89777




Test the 3 the key is deleted


FindHash find it Key= 123 Data=123
========================
FindHash find it Key= 236 Data=4332
========================
FindHash find it Key= 223 Data=223
========================
FindHash find it Key= 191 Data=2239
========================
FindHash find it Key= 689 Data=89777
========================
FindHash find it Key= 183 Data=54455
========================
FindHash find it Key= 32 Data=32
========================
FindHash find it Key= 23 Data=23
========================
FindHash find it Key= 834 Data=64322
========================




Show all the node ==========================
key= 23   key= 32   key= 123   key= 223   key= 2239   key= 4332   key= 54455   key= 64322   key= 89777


你可能感兴趣的:(多核计算与程序设计 - 06 基本算法和数据结构 之三 哈希表与哈希链表)