具体算法实现细节请查看IEEE论文:Set-oriented mining for association rules in relational databases
SETM算法代码+测试用例+具体操作步骤下载戳这http://download.csdn.net/detail/michealtx/4266085
sql语言实现,找出来所有的频繁模式。代码如下:
---------------最终版------------ --drop table c1,c2,c3,c4,r1,r2,r3,r4 ------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------- --为SETMAlgorithm数据库创建空表r1 create table R1 ( tx_id int, item1 nvarchar(50) ) --把setmbackup数据库的sales_data表中导入数据到r1中 insert into R1 select * from SETMBackup.dbo.SALES_DATA ------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------- --------------------setm算法过程--------------- --将R1按item1排序 select * into #temp from R1--把R1内容存到临时表temp中 delete from R1--删除R1 insert R1 select * from #temp order by item1--将临时表temp按item1升序排序后插入到R1 drop table #temp--删除临时表temp --声明最小支持度MinSupport declare @minimum_support int set @minimum_support=3 declare @MinSupport varchar(10) set @MinSupport=cast(@minimum_support as varchar(10)) --声明变量k declare @k int set @k=1 --由R1生成C1 declare @CK varchar(100) set @CK='C'+cast(@k as varchar(100)) declare @CreateTable varchar(100) set @CreateTable=' create table '+@CK+' (item1 nvarchar(10),cnt int) ' declare @RK varchar(10) set @RK='R'+cast(@k as varchar(100)) declare @InsertInto varchar(100) set @InsertInto=' insert into '+@CK+' select item1,COUNT(*) from '+@RK+' group by item1 having COUNT(*)>='+@MinSupport exec(@CreateTable) exec(@InsertInto) print '由R1生成C1' --进入循环 declare @RKNotEmpty varchar(100) --set @RKNotEmpty='if not exists(select * from '+@RK+') begin drop table '+@RK+' break end' declare @KMinusOne int set @KMinusOne=1 declare @RKMinusOne varchar(10) set @RKMinusOne='R'+cast(@KMinusOne as varchar(100)) while 1=1 begin print '进入循环' set @KMinusOne=@k set @RKMinusOne='R'+cast(@KMinusOne as varchar(100)) set @k=@k+1 set @RK='R'+cast(@k as varchar(100)) ---------------------------------------------------- -----------------将R[k-1]按全部列排序--------------- declare @str1 varchar(MAX) set @str1= 'declare @max int select @max=count(name) from syscolumns where id=object_id('''+@RKMinusOne+''') --print @max declare @min int set @min=1 declare @tt varchar(max) set @tt='''' while @min<=@max begin declare @name varchar(MAX) select @name=name from( select row_number()over(order by getdate()) as num, name from syscolumns where id=object_id('''+@RKMinusOne+'''))a where num=@min set @tt=ISNULL(@tt,'','')+@name+'','' set @min=@min+1 end --print @tt declare @ss varchar(max) set @ss='''' set @ss=''select * into #temp from '+@RKMinusOne+' order by '' +LEFT(@tt,LEN(@tt)-1)+'' delete from '+@RKMinusOne+' insert '+@RKMinusOne+' select * from #temp drop table #temp'' exec(@ss) ' exec(@str1) print '将R[k-1]按全部列排序完成' --扫描R[k-1]和R1将符合条件者合并生成RKTemp(即R'[K]) declare @num int set @num=1 declare @str varchar(max) set @str='create table RKTemp(' declare @sql varchar(1000) set @sql='tx_id int,' while @num<=@k begin set @sql=ISNULL(@sql,',')+'item'+CAST(@num as varchar)+' nvarchar(50),' set @num=@num+1 end set @str=@str+left(@sql,LEN(@sql)-1)+')' exec(@str) declare @str2 varchar(max) set @str2= ' insert into rktemp select '+@RKMinusOne+'.*,myr1.item1 from '+@RKMinusOne+', R1 myr1 where '+@RKMinusOne+'.tx_id=myr1.tx_id and myr1.item1>'+@RKMinusOne+'.item'+cast(@KMinusOne as varchar(10)) exec(@str2) print '扫描R[k-1]和R1将符合条件者合并生成Rktemp完成' ---------------将RKTemp(即R'[K])按全部item列排序-------------------- set @str1= 'declare @max int select @max=count(name) from syscolumns where id=object_id('''+'RKTemp'+''') --print @max declare @min int set @min=2 declare @tt varchar(max) set @tt='''' while @min<=@max begin declare @name varchar(MAX) select @name=name from( select row_number()over(order by getdate()) as num, name from syscolumns where id=object_id('''+'RKTemp'+'''))a where num=@min set @tt=ISNULL(@tt,'','')+@name+'','' set @min=@min+1 end --print @tt declare @ss varchar(max) set @ss='''' set @ss=''select * into #temp from '+'RKTemp'+' order by '' +LEFT(@tt,LEN(@tt)-1)+'' delete from '+'RKTemp'+' insert '+'RKTemp'+' select * from #temp drop table #temp'' exec(@ss) ' exec(@str1) print '将RKTemp按全部item列排序完成' -------------------用RKTemp(即R'[k])计算支持度生成C[K]------------------- declare @number int set @number=1 set @CK='C'+cast(@k as varchar(100)) declare @string varchar(max) set @string='create table '+@CK+'(' declare @sqlstring varchar(1000) set @sqlstring='' while @number<=@k begin set @sqlstring=ISNULL(@sqlstring,',')+'item'+CAST(@number as varchar)+' nvarchar(50),' set @number=@number+1 end set @sqlstring=@sqlstring+'cnt int,' --print left(@sqlstring,LEN(@sqlstring)-1) set @string=@string+left(@sqlstring,LEN(@sqlstring)-1)+')' exec(@string) declare @group varchar(max) set @group='' set @number=1 while @number<=@k begin set @group=ISNULL(@group,',')+'item'+CAST(@number as varchar)+',' set @number=@number+1 end set @group=left(@group,LEN(@group)-1) --print '@group='+@group declare @string2 varchar(max) set @string2= ' insert into '+@CK+' select '+@group+',count(*) from RKTemp group by '+@group+' having count(*)>='+@MinSupport --print '@string2='+@string2 exec(@string2) print '用RKTemp计算支持度生成C[K]完成' -----------------用RKTemp和C[K]生成R[K]-------------------------- declare @numbert int set @numbert=1 set @RK='R'+cast(@k as varchar(100)) declare @stringt varchar(max) set @stringt='create table '+@RK+'(' declare @sqlstringt varchar(1000) set @sqlstringt='tx_id int,' while @numbert<=@k begin set @sqlstringt=ISNULL(@sqlstringt,',')+'item'+CAST(@numbert as varchar)+' nvarchar(50),' set @numbert=@numbert+1 end --print left(@sqlstringt,LEN(@sqlstringt)-1) set @stringt=@stringt+left(@sqlstringt,LEN(@sqlstringt)-1)+')' exec(@stringt) declare @string3 varchar(max) set @string3= ' insert into '+@RK+' select RKTemp.* from RKTemp,'+@CK+' where RKTemp.item1='+@CK+'.item1 and RKTemp.item'+cast(@k as varchar(10))+'='+@CK+'.item'+cast(@k as varchar(10)) --print '@string3='+@string3 exec(@string3) print '用RKTemp和C[K]生成R[K]完成' -------------------R'[k]用完要删除---------------- drop table RKTemp ----------------------@RKNotEmpty重新赋值,若RK为空,则退出循环------------- set @RKNotEmpty='select tx_id into panduan from '+@RK exec(@RKNotEmpty) if not exists(select * from panduan) begin drop table panduan print @RK+'为空,跳出循环啦!' break end else drop table panduan end ---------------------------------------------------------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------------------------------------------------------- ---------------------------------------------------------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------------------------------------------