数据挖掘之关联规则挖掘之SETM算法实现

具体算法实现细节请查看IEEE论文:Set-oriented mining for association rules in relational databases

SETM算法代码+测试用例+具体操作步骤下载戳这http://download.csdn.net/detail/michealtx/4266085

sql语言实现,找出来所有的频繁模式。代码如下:


---------------最终版------------

--drop table c1,c2,c3,c4,r1,r2,r3,r4
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
--为SETMAlgorithm数据库创建空表r1
create table R1
(
	tx_id int,
	item1 nvarchar(50)
)

--把setmbackup数据库的sales_data表中导入数据到r1中
insert into R1
select *
from SETMBackup.dbo.SALES_DATA


-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
--------------------setm算法过程---------------
--将R1按item1排序
select * into #temp from R1--把R1内容存到临时表temp中
delete from R1--删除R1
insert R1 select * from #temp order by item1--将临时表temp按item1升序排序后插入到R1
drop table #temp--删除临时表temp

--声明最小支持度MinSupport
declare @minimum_support int
set @minimum_support=3

declare @MinSupport varchar(10)
set @MinSupport=cast(@minimum_support as varchar(10))

--声明变量k
declare @k int
set @k=1

--由R1生成C1
declare @CK varchar(100)
set @CK='C'+cast(@k as varchar(100)) 

declare @CreateTable varchar(100) 
set @CreateTable=' create table '+@CK+' (item1 nvarchar(10),cnt int) ' 

declare @RK varchar(10)
set @RK='R'+cast(@k as varchar(100)) 

declare @InsertInto varchar(100)
set @InsertInto=' insert into '+@CK+' select item1,COUNT(*) from '+@RK+' group by item1 having COUNT(*)>='+@MinSupport

exec(@CreateTable)
exec(@InsertInto)
print '由R1生成C1'

--进入循环
declare @RKNotEmpty varchar(100)
--set @RKNotEmpty='if not exists(select * from '+@RK+') begin drop table '+@RK+' break end'

declare @KMinusOne int 
set @KMinusOne=1

declare @RKMinusOne varchar(10)
set @RKMinusOne='R'+cast(@KMinusOne as varchar(100)) 

while 1=1
begin
	print '进入循环'
	set @KMinusOne=@k
	set @RKMinusOne='R'+cast(@KMinusOne as varchar(100)) 
	set @k=@k+1	
	set @RK='R'+cast(@k as varchar(100)) 
	----------------------------------------------------
	-----------------将R[k-1]按全部列排序---------------
	declare @str1 varchar(MAX)
	set @str1=
	'declare @max int 
	select @max=count(name) 
	from syscolumns 
	where id=object_id('''+@RKMinusOne+''') 
	--print @max 
	declare @min int 
	set @min=1 
	declare @tt varchar(max) 
	set @tt='''' 
	while @min<=@max 
	begin 
	declare @name varchar(MAX) 
	select @name=name from( 
	select row_number()over(order by getdate()) as num, 
	name from syscolumns where id=object_id('''+@RKMinusOne+'''))a 
	where num=@min 
	set @tt=ISNULL(@tt,'','')+@name+'','' 
	set @min=@min+1 
	end 
	--print @tt 

	declare @ss varchar(max)
	set @ss='''' 
	set  @ss=''select * into #temp from '+@RKMinusOne+' order by ''
	+LEFT(@tt,LEN(@tt)-1)+'' delete from '+@RKMinusOne+' insert '+@RKMinusOne+' select * from #temp drop table #temp''
	exec(@ss)
	'
	exec(@str1)
	print '将R[k-1]按全部列排序完成'
	
    --扫描R[k-1]和R1将符合条件者合并生成RKTemp(即R'[K])
	declare @num int
	set @num=1
	declare @str varchar(max)
	set @str='create table RKTemp('
	declare @sql varchar(1000)
	set @sql='tx_id int,'
	while @num<=@k
	begin
	set @sql=ISNULL(@sql,',')+'item'+CAST(@num as varchar)+' nvarchar(50),'
	set @num=@num+1
	end
	set @str=@str+left(@sql,LEN(@sql)-1)+')'
	exec(@str)
	declare @str2 varchar(max)
	set @str2=
		'
		insert into rktemp 
		select '+@RKMinusOne+'.*,myr1.item1 
		from '+@RKMinusOne+', R1 myr1
		where '+@RKMinusOne+'.tx_id=myr1.tx_id and myr1.item1>'+@RKMinusOne+'.item'+cast(@KMinusOne as varchar(10))
	exec(@str2)
	print '扫描R[k-1]和R1将符合条件者合并生成Rktemp完成'
	---------------将RKTemp(即R'[K])按全部item列排序--------------------
	set @str1=
	'declare @max int 
	select @max=count(name) 
	from syscolumns 
	where id=object_id('''+'RKTemp'+''') 
	--print @max 
	declare @min int 
	set @min=2 
	declare @tt varchar(max) 
	set @tt='''' 
	while @min<=@max 
	begin 
	declare @name varchar(MAX) 
	select @name=name from( 
	select row_number()over(order by getdate()) as num, 
	name from syscolumns where id=object_id('''+'RKTemp'+'''))a 
	where num=@min 
	set @tt=ISNULL(@tt,'','')+@name+'','' 
	set @min=@min+1 
	end 
	--print @tt 

	declare @ss varchar(max)
	set @ss='''' 
	set  @ss=''select * into #temp from '+'RKTemp'+' order by ''
	+LEFT(@tt,LEN(@tt)-1)+'' delete from '+'RKTemp'+' insert '+'RKTemp'+' select * from #temp drop table #temp''
	exec(@ss)
	'
	exec(@str1)
	print '将RKTemp按全部item列排序完成'
	-------------------用RKTemp(即R'[k])计算支持度生成C[K]-------------------
	declare @number int
	set @number=1
	set @CK='C'+cast(@k as varchar(100)) 
	declare @string varchar(max)
	set @string='create table '+@CK+'('
	declare @sqlstring varchar(1000)
	set @sqlstring=''
	while @number<=@k
	begin
	set @sqlstring=ISNULL(@sqlstring,',')+'item'+CAST(@number as varchar)+' nvarchar(50),'
	set @number=@number+1
	end
	set @sqlstring=@sqlstring+'cnt int,'
	--print left(@sqlstring,LEN(@sqlstring)-1)
	set @string=@string+left(@sqlstring,LEN(@sqlstring)-1)+')'
	exec(@string)
	declare @group varchar(max)
	set @group=''
	set @number=1
	while @number<=@k
	begin
	set @group=ISNULL(@group,',')+'item'+CAST(@number as varchar)+','
	set @number=@number+1
	end
	set @group=left(@group,LEN(@group)-1)
	--print '@group='+@group
	declare @string2 varchar(max)
	set @string2=
		'
		insert into '+@CK+' 
		select '+@group+',count(*) 
		from RKTemp 
		group by '+@group+' 
		having count(*)>='+@MinSupport
	--print '@string2='+@string2
	exec(@string2)
	print '用RKTemp计算支持度生成C[K]完成'
	-----------------用RKTemp和C[K]生成R[K]--------------------------		
	declare @numbert int
	set @numbert=1
	set @RK='R'+cast(@k as varchar(100)) 
	declare @stringt varchar(max)
	set @stringt='create table '+@RK+'('
	declare @sqlstringt varchar(1000)
	set @sqlstringt='tx_id int,'
	while @numbert<=@k
	begin
	set @sqlstringt=ISNULL(@sqlstringt,',')+'item'+CAST(@numbert as varchar)+' nvarchar(50),'
	set @numbert=@numbert+1
	end
	--print left(@sqlstringt,LEN(@sqlstringt)-1)
	set @stringt=@stringt+left(@sqlstringt,LEN(@sqlstringt)-1)+')'
	exec(@stringt)

	declare @string3 varchar(max)
	set @string3=
		'
		insert into '+@RK+' 
		select RKTemp.* 
		from RKTemp,'+@CK+'
		where RKTemp.item1='+@CK+'.item1 and  RKTemp.item'+cast(@k as varchar(10))+'='+@CK+'.item'+cast(@k as varchar(10))

	--print '@string3='+@string3
	exec(@string3)
	print '用RKTemp和C[K]生成R[K]完成'
	
	-------------------R'[k]用完要删除----------------
	drop table RKTemp
	----------------------@RKNotEmpty重新赋值,若RK为空,则退出循环-------------
	set @RKNotEmpty='select tx_id into panduan from '+@RK
	exec(@RKNotEmpty)
	if not exists(select * from panduan)
	begin
	drop table panduan
	print @RK+'为空,跳出循环啦!'
	break
	end
	else
	drop table panduan
end

----------------------------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------------------------------


你可能感兴趣的:(算法,数据挖掘,String,object,table,insert)