MSBI--enlarge the DW database table volume

我们在学习MSBI的时候,经常会使用官方提供的Adventureworks和AdventureworksDW示例数据库,但是官方提供的数据量有点小,

以DW为例,Factinternetsales只有不到七万行的数据,我们很难做某些测试,譬如说想对事实表做一个分区测试,测试CUBE性能等.

为此,我想以FactInternetSales表为入口,扩大它的数据量到数亿条.

经过分析,我觉得只要把理解主键是怎么生成的,以及orderdatekey,duedatekey,shipdatekey修改下,随机生成productkey与customerkey

(其实是随机从这两个维表中抽取key),基本上就可以装配好数据.

以下是我的代码,仅供参考.欢迎同学们提供更好的思路.

我现在是先取出salesordernumber最大的那个,然后基于这个进行递加,还有maxorderdate.

一天随机抽取300个产品和1000个客户进行匹配,这样就可以产生三十万条记录.

如果循环700次,因为怕DimDate里面的key不存在(可以自己扩容DimDate)  ,可以保证DateKey是在DimDate里面还有.

即可创建大约二亿条数据.基本可以满足测试的需求了.

随机是使用order by newid()来实现的,然后通过row_number() 来生成行号再加上当前最大的那个salesordernumber来作为新的.

declare 

@maxsonumber int,

@maxorderdate date,

@loopcount int

select

@maxsonumber = max(cast(replace(salesordernumber,left(salesordernumber,2),'') as int)),

@maxorderdate =

CAST( substring(cast(max(orderdatekey) as varchar),1,4)

 +'-'+substring(cast(max(orderdatekey) as varchar),5,2) 

 +'-'+substring(cast(max(orderdatekey) as varchar),7,2)

as DATE)

from FactInternetSales

select @maxorderdate--20080801

select @maxsonumber--100000

 

set @loopcount = 1



while @loopcount < 100

BEGIN

BEGIN TRAN



BEGIN TRY 

INSERT INTO [AdventureWorksDW2008R2].[dbo].[FactInternetSales]

           ([ProductKey]

           ,[OrderDateKey]

           ,[DueDateKey]

           ,[ShipDateKey]

           ,[CustomerKey]

           ,[PromotionKey]

           ,[CurrencyKey]

           ,[SalesTerritoryKey]

           ,[SalesOrderNumber]

           ,[SalesOrderLineNumber]

           ,[RevisionNumber]

           ,[OrderQuantity]

           ,[UnitPrice]

           ,[ExtendedAmount]

           ,[UnitPriceDiscountPct]

           ,[DiscountAmount]

           ,[ProductStandardCost]

           ,[TotalProductCost]

           ,[SalesAmount]

           ,[TaxAmt]

           ,[Freight]

           ,[CarrierTrackingNumber]

           ,[CustomerPONumber])

SELECT

            [ProductKey]

           ,replace(cast(dateadd(dd,1,@maxorderdate)as varchar),'-','')

           ,replace(cast(dateadd(dd,1,@maxorderdate)as varchar),'-','')

           ,replace(cast(dateadd(dd,DaysToManufacture,@maxorderdate) as varchar),'-','')

           ,[CustomerKey]

           ,1

           ,100

           ,10

           ,'SO'+CAST(@maxsonumber+rowno AS VARCHAR)

           ,1

           ,1

           ,1

           ,DealerPrice

           ,DealerPrice

           ,0

           ,0

           ,StandardCost

           ,StandardCost

           ,DealerPrice

           ,0

           ,0

           ,NULL

           ,NULL

FROM

(

SELECT

ROW_NUMBER() over(order by newid()) as rowno,

*

FROM

(SELECT TOP 300 ProductKey,DealerPrice,StandardCost,DaysToManufacture FROM DimProduct

 WHERE DealerPrice IS NOT NULL AND StandardCost is not null ORDER BY NEWID()) AS P

CROSS JOIN

(SELECT TOP 1000 CustomerKey FROM DimCustomer ORDER BY NEWID()) AS C

) AS X

END TRY

BEGIN CATCH

PRINT ERROR_MESSAGE()

    IF @@TRANCOUNT >0

      ROLLBACK TRAN;

END CATCH

    IF @@TRANCOUNT >0

      COMMIT TRAN;



select

@maxsonumber = max(cast(replace(salesordernumber,left(salesordernumber,2),'') as int)),

@maxorderdate =

CAST( substring(cast(max(orderdatekey) as varchar),1,4)

 +'-'+substring(cast(max(orderdatekey) as varchar),5,2) 

 +'-'+substring(cast(max(orderdatekey) as varchar),7,2)

as DATE)

from FactInternetSales



print 'maxsonumber:' + cast(@maxsonumber as varchar)

print 'maxorderdate:' + cast(@maxorderdate as varchar)



SET @loopcount += 1

END
View Code

 

   

 

你可能感兴趣的:(database)