在计算机科学中,并查集是一种树型的数据结构,其保持着用于处理一些不相交集合(Disjoint Sets)的合并及查询问题。有一个联合-查找算法(union-find algorithm)定义了两个操作用于此数据结构:
Find:确定元素属于哪一个子集。它可以被用来确定两个元素是否属于同一子集;
Union:将两个子集合并成同一个集合;
实现并查集的关键是实现union-find algorithm, 本文根据常用的四种算法,实现了这个类,具体算法实现请参看维基百科;
制造测试数据集,测试几种方法之间性能的指标;
#ifndef _DISJOINT_SET_H_
#define _DISJOINT_SET_H_
#include
#include
#include
#include
#include
#include "windows.h"
enum DISJOINTWAY
{
COMMON_WAY,
COMPREE_WAY,
WEIGHT_WAY,
WEIGHT_COMPRESS_WAY
};
/*
* encapsulate the class of disjoint set
*
*/
#define MAXDISJOINTSET 0xffffff
class DisjointSet
{
public:
DisjointSet( int maxSize = MAXDISJOINTSET ):m_item(0), m_size(maxSize)
{
m_item = new int[maxSize];
for( int i = 0; i < m_size; i++ )
{
m_item[i] = i;
}
m_path = new int[maxSize];
memset( m_path, 1, sizeof(int)*maxSize );
}
~DisjointSet()
{
Clear();
}
/*
* find interface
*
*/
int Find( DISJOINTWAY way, int input )
{
assert( input < m_size );
switch( way )
{
case COMMON_WAY:
return ImplFindFirst( input );
case COMPREE_WAY:
return ImplFindSecond( input );
case WEIGHT_WAY:
return ImplFindWeight( input );
case WEIGHT_COMPRESS_WAY:
return ImplFindWeightCompree( input );
default:
return -1;
}
}
/*
* make union
*
*/
void Union( DISJOINTWAY way, int first, int second )
{
assert( first < m_size && second < m_size );
switch( way )
{
case COMMON_WAY:
ImplUnionFirst( first, second );
break;
case COMPREE_WAY:
ImplUnionSecond( first, second );
break;
case WEIGHT_WAY:
ImplUnionWeighted( first, second );
break;
case WEIGHT_COMPRESS_WAY:
ImplUnionCompree( first, second );
break;
default:
break;
}
}
/*
*
*
*/
void Clear()
{
delete [] m_item;
m_item = 0;
delete [] m_path;
m_path = 0;
m_size = 0;
}
protected:
int ImplFindFirst( int input )
{
assert( input < m_size );
return m_item[input];
}
int ImplFindSecond( int input )
{
int i = input;
for( ; i != m_item[i]; i = m_item[i] );
return i;
}
int ImplFindWeight( int input )
{
int i = input;
for( ; i != m_item[i]; i = m_item[i] );
return i;
}
int ImplFindWeightCompree( int input )
{
int i = input;
for( ; i != m_item[i]; i = m_item[i] )
m_item[i] = m_item[m_item[i]];
return i;
}
/*
*
*
*/
void ImplUnionFirst( int first, int second )
{
int x = m_item[first];
int y = m_item[second];
if( x != y )
{
m_item[first] = y;
}
for( int i = 0; i < m_size; i++ )
{
if( x == m_item[i] )
m_item[i] = y;
}
}
/*
*
*
*/
void ImplUnionSecond( int& first, int& second )
{
if( first != second )
{
m_item[first] = second;
}
}
/*
*
*
*/
void ImplUnionWeighted( int first, int second )
{
if( first != second )
{
if( m_path[first] < m_path[second] )
{
m_item[first] = second;
m_path[second] += m_path[first];
}
else
{
m_item[second] = first;
m_path[first] += m_path[second];
}
}
}
/*
*
*
*/
void ImplUnionCompree( int first, int second )
{
if( first != second )
{
if( m_path[first] < m_path[second] )
{
m_item[first] = second;
m_path[second] += m_path[first];
}
else
{
m_item[second] = first;
m_path[first] += m_path[second];
}
}
}
protected:
int* m_item;
int m_size;
int* m_path;
};
void TestDisjointSetSimple()
{
DisjointSet djoint;
int i = djoint.Find( COMMON_WAY, 1 );
int j = djoint.Find( COMMON_WAY, 3 );
if( i != j )
djoint.Union( COMMON_WAY, 1, 3 );
i = djoint.Find( COMMON_WAY, 2 );
j = djoint.Find( COMMON_WAY, 5 );
if( i != j )
djoint.Union( COMMON_WAY, i, j );
i = djoint.Find( COMMON_WAY, 2 );
j = djoint.Find( COMMON_WAY, 6 );
if( i != j )
djoint.Union( COMMON_WAY, i, j );
i = djoint.Find( COMMON_WAY, 6 );
j = djoint.Find( COMMON_WAY, 7 );
if( i != j )
djoint.Union( COMMON_WAY, i, j );
assert( djoint.Find( COMMON_WAY, 2 ) == djoint.Find( COMMON_WAY, 7 ) );
i = djoint.Find( COMMON_WAY, 1 );
j = djoint.Find( COMMON_WAY, 7 );
if( i != j )
djoint.Union( COMMON_WAY, i, j );
assert( djoint.Find( COMMON_WAY, 3 ) == djoint.Find( COMMON_WAY, 7 ) );
}
void TestDisjointSetComplex( DISJOINTWAY way, const char* str )
{
unsigned long start = GetTickCount();
DisjointSet djoint;
const int len = 1000000;
const int base = 60000;
int halfLen = len / 2;
srand( time(NULL) );
for( int i = 0; i < len; i++ )
{
int first = rand() % base;
int second = rand() % base;
if( i > halfLen )
{
first += base;
second += base;
}
if( first != second )
{
first = djoint.Find( way, first );
second = djoint.Find( way, second );
if( first != second )
djoint.Union( way, first, second );
assert( djoint.Find( way, first ) == djoint.Find( way, second ) );
}
}
unsigned long interval = GetTickCount() - start;
printf(" %s way consume time is %d \n", str, interval );
}
void TestSuiteDisjointSet()
{
TestDisjointSetSimple();
const char* str[] = {"common", "compress", "weight", "weight compress"};
for( int i = WEIGHT_COMPRESS_WAY; i >= 0; i--)
{
TestDisjointSetComplex((DISJOINTWAY)i, str[i] );
}
}
#endif
compile and run in visual studio 2005
下面图片是几种方法运行时间之比较,最直白方法的时间到现在还没输出,所以就没有显示: