在计算机科学中,并查集是一种树型的数据结构,其保持着用于处理一些不相交集合(Disjoint Sets)的合并及查询问题。有一个联合-查找算法(union-find algorithm)定义了两个操作用于此数据结构:
Find:确定元素属于哪一个子集。它能够被用来确定两个元素是否属于同一子集;
Union:将两个子集合并成同一个集合;
实现并查集的关键是实现union-find algorithm, 本文依据经常使用的四种算法,实现了这个类,详细算法实现请參看维基百科;
制造測试数据集,測试几种方法之间性能的指标;
#ifndef _DISJOINT_SET_H_ #define _DISJOINT_SET_H_ #include <stdlib.h> #include <stdio.h> #include <assert.h> #include <time.h> #include <math.h> #include "windows.h" enum DISJOINTWAY { COMMON_WAY, COMPREE_WAY, WEIGHT_WAY, WEIGHT_COMPRESS_WAY }; /* * encapsulate the class of disjoint set * */ #define MAXDISJOINTSET 0xffffff class DisjointSet { public: DisjointSet( int maxSize = MAXDISJOINTSET ):m_item(0), m_size(maxSize) { m_item = new int[maxSize]; for( int i = 0; i < m_size; i++ ) { m_item[i] = i; } m_path = new int[maxSize]; memset( m_path, 1, sizeof(int)*maxSize ); } ~DisjointSet() { Clear(); } /* * find interface * */ int Find( DISJOINTWAY way, int input ) { assert( input < m_size ); switch( way ) { case COMMON_WAY: return ImplFindFirst( input ); case COMPREE_WAY: return ImplFindSecond( input ); case WEIGHT_WAY: return ImplFindWeight( input ); case WEIGHT_COMPRESS_WAY: return ImplFindWeightCompree( input ); default: return -1; } } /* * make union * */ void Union( DISJOINTWAY way, int first, int second ) { assert( first < m_size && second < m_size ); switch( way ) { case COMMON_WAY: ImplUnionFirst( first, second ); break; case COMPREE_WAY: ImplUnionSecond( first, second ); break; case WEIGHT_WAY: ImplUnionWeighted( first, second ); break; case WEIGHT_COMPRESS_WAY: ImplUnionCompree( first, second ); break; default: break; } } /* * * */ void Clear() { delete [] m_item; m_item = 0; delete [] m_path; m_path = 0; m_size = 0; } protected: int ImplFindFirst( int input ) { assert( input < m_size ); return m_item[input]; } int ImplFindSecond( int input ) { int i = input; for( ; i != m_item[i]; i = m_item[i] ); return i; } int ImplFindWeight( int input ) { int i = input; for( ; i != m_item[i]; i = m_item[i] ); return i; } int ImplFindWeightCompree( int input ) { int i = input; for( ; i != m_item[i]; i = m_item[i] ) m_item[i] = m_item[m_item[i]]; return i; } /* * * */ void ImplUnionFirst( int first, int second ) { int x = m_item[first]; int y = m_item[second]; if( x != y ) { m_item[first] = y; } for( int i = 0; i < m_size; i++ ) { if( x == m_item[i] ) m_item[i] = y; } } /* * * */ void ImplUnionSecond( int& first, int& second ) { if( first != second ) { m_item[first] = second; } } /* * * */ void ImplUnionWeighted( int first, int second ) { if( first != second ) { if( m_path[first] < m_path[second] ) { m_item[first] = second; m_path[second] += m_path[first]; } else { m_item[second] = first; m_path[first] += m_path[second]; } } } /* * * */ void ImplUnionCompree( int first, int second ) { if( first != second ) { if( m_path[first] < m_path[second] ) { m_item[first] = second; m_path[second] += m_path[first]; } else { m_item[second] = first; m_path[first] += m_path[second]; } } } protected: int* m_item; int m_size; int* m_path; }; void TestDisjointSetSimple() { DisjointSet djoint; int i = djoint.Find( COMMON_WAY, 1 ); int j = djoint.Find( COMMON_WAY, 3 ); if( i != j ) djoint.Union( COMMON_WAY, 1, 3 ); i = djoint.Find( COMMON_WAY, 2 ); j = djoint.Find( COMMON_WAY, 5 ); if( i != j ) djoint.Union( COMMON_WAY, i, j ); i = djoint.Find( COMMON_WAY, 2 ); j = djoint.Find( COMMON_WAY, 6 ); if( i != j ) djoint.Union( COMMON_WAY, i, j ); i = djoint.Find( COMMON_WAY, 6 ); j = djoint.Find( COMMON_WAY, 7 ); if( i != j ) djoint.Union( COMMON_WAY, i, j ); assert( djoint.Find( COMMON_WAY, 2 ) == djoint.Find( COMMON_WAY, 7 ) ); i = djoint.Find( COMMON_WAY, 1 ); j = djoint.Find( COMMON_WAY, 7 ); if( i != j ) djoint.Union( COMMON_WAY, i, j ); assert( djoint.Find( COMMON_WAY, 3 ) == djoint.Find( COMMON_WAY, 7 ) ); } void TestDisjointSetComplex( DISJOINTWAY way, const char* str ) { unsigned long start = GetTickCount(); DisjointSet djoint; const int len = 1000000; const int base = 60000; int halfLen = len / 2; srand( time(NULL) ); for( int i = 0; i < len; i++ ) { int first = rand() % base; int second = rand() % base; if( i > halfLen ) { first += base; second += base; } if( first != second ) { first = djoint.Find( way, first ); second = djoint.Find( way, second ); if( first != second ) djoint.Union( way, first, second ); assert( djoint.Find( way, first ) == djoint.Find( way, second ) ); } } unsigned long interval = GetTickCount() - start; printf(" %s way consume time is %d \n", str, interval ); } void TestSuiteDisjointSet() { TestDisjointSetSimple(); const char* str[] = {"common", "compress", "weight", "weight compress"}; for( int i = WEIGHT_COMPRESS_WAY; i >= 0; i--) { TestDisjointSetComplex((DISJOINTWAY)i, str[i] ); } } #endif
compile and run in visual studio 2005
以下图片是几种方法执行时间之比較,最直白方法的时间到如今还没输出,所以就没有显示: