hive sql

insert overwrite table AD_MATE_FT PARTITION (pt='2012-03-05 10')
select
    temp0.mrid as material_id ,
    temp9.name as material_name,
    'pv' as AD_EFT_MATE_CAL_TYPE,
    temp0.uid as temp0,
    temp0.bussiness_id as bussiness_id ,
    'HOURLY' as time_type,
    '2012-03-05 10' as date_desc,
    temp0.effect_sum as ad_eft_mate_cnt,
    temp1.EXPOSE_sum as ad_show_cnt,
    temp2.click_sum as ad_click_cnt,
    temp3.ad_percent as ad_click_rate,
    temp4.ad_browse as pageview_cnt,
    temp5.ad_clibro as pvcnt_div_adclick,
    temp7.ad_step as  ad_bounce_rate,
    temp6.ad_percon as ad_conv_rate,
    temp8.ad_avgtime as avg_stay_time,
    to_date(unix_timestamp())
from (select * from sum0_effect_hour_temp where  pt='2012-03-05 10')temp0
      join (select * from sum0_expose_hour_temp where  pt='2012-03-05 10')temp1
         on (temp0.mrid=temp1.mrid and temp1.pt=temp0.pt and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
       join(select * from sum0_click_hour_temp where  pt='2012-03-05 10')temp2
         on(temp2.mrid=temp1.mrid and temp1.pt=temp2.pt and temp2.uid=temp1.uid and temp2.bussiness_id=temp1.bussiness_id)
       join(select * from sum0_click_percent_hour_temp where  pt='2012-03-05 10')temp3
         on(temp2.mrid=temp3.mrid and temp3.pt=temp2.pt and temp2.uid=temp3.uid and temp2.bussiness_id=temp3.bussiness_id)
       join(select * from sum0_click_browse_hour_temp where  pt='2012-03-05 10')temp4
         on(temp4.mrid=temp3.mrid and temp3.pt=temp4.pt and temp4.uid=temp3.uid and temp4.bussiness_id=temp3.bussiness_id)
       join(select * from  sum0_clibro_hour_rate_temp where  pt='2012-03-05 10')temp5
         on(temp4.mrid=temp5.mrid and temp5.pt=temp4.pt and temp4.uid=temp5.uid and temp4.bussiness_id=temp5.bussiness_id)
       join(select * from sum0_per_con_hour_temp where  pt='2012-03-05 10')temp6
         on(temp6.mrid=temp5.mrid and temp5.pt=temp6.pt and temp6.uid=temp5.uid and temp6.bussiness_id=temp5.bussiness_id)
       join(select * from  sum0_steprate_hour_temp where  pt='2012-03-05 10')temp7
         on(temp6.mrid=temp7.mrid and temp7.pt=temp6.pt and temp6.uid=temp7.uid and temp6.bussiness_id=temp7.bussiness_id)
       join(select * from  avg0_time_hour_temp where  pt='2012-03-05 10')temp8
         on(temp8.mrid=temp7.mrid and temp7.pt=temp8.pt and temp8.uid=temp7.uid and temp8.bussiness_id=temp7.bussiness_id)
       join(select * from  AD_MATERIAL_DIM)temp9
         on(temp8.mrid=temp9.id and temp8.uid=temp9.uid )
  where temp0.pt=temp1.pt =temp2.pt=temp3.pt=temp4.pt=temp5.pt=temp6.pt=temp7.pt=temp8.pt;





按小时计算效果表的总记录
create table if not exists sum0_effect_hour_temp
(

effect_sum int,
MRID string,
uid string,
bussiness_id string
)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;


insert overwrite table sum0_effect_hour_temp PARTITION (pt='2012-03-05 10')
  select
  COUNT(effect_id) as effect_sum ,
  temp0.mrid as mrid,
  temp0.bussiness_id as bussiness_id,
  temp0.uid as uid
  from
  (
select * from ad_PATH where pt='2012-03-05 10'
  ) temp0
  left outer join
  (
select effect_id AS effect_id  ,PAGE_ID AS PAGE_ID ,pt as pt
from  ad_effect
where pt='2012-03-05 10'
  ) temp1
  on(temp1.PAGE_ID=temp0.PAGE_ID)
  where temp0.pt=temp1.pt and bussiness_id is not NULL
  GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;

按小时计算点击表的曝光数

create table if not exists sum0_expose_hour_temp
(
EXPOSE_sum int,
MRID string,
uid string,
bussiness_id string

)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

  insert overwrite table sum0_expose_hour_temp PARTITION (pt='2012-03-05 10')
  select
     COUNT(temp1.PAGE_ID) as EXPOSE_sum,
     temp0.mrid as mrid,
     temp0.bussiness_id as bussiness_id,
     temp0.uid as uid
  from (select * from ad_PATH where pt='2012-03-05 10' and BUSSINESS_ID is not NULL ) temp0
     left outer join(
      select  PAGE_ID ,pt from ad_expose_click WHERE pt='2012-03-05 10' AND action='expose'
     ) temp1
   on(temp1.PAGE_ID=temp0.PAGE_ID)
    where temp0.pt=temp1.pt
          GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;


按小时计算点击表的点击数

create table if not exists sum0_click_hour_temp
(
click_sum int,
MRID string,
uid string,
bussiness_id string

)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table sum0_click_hour_temp PARTITION (pt='2012-03-05 10')
  select
     COUNT(temp1.PAGE_ID) as click_sum,
     temp0.mrid as mrid,
     temp0.bussiness_id as bussiness_id,
     temp0.uid as uid
  from (select * from ad_PATH where pt='2012-03-05 10' and BUSSINESS_ID is not NULL ) temp0
     left outer join(
      select  PAGE_ID ,pt from ad_expose_click WHERE pt='2012-03-05 10' AND action='click'
     ) temp1
   on(temp1.PAGE_ID=temp0.PAGE_ID)
    where temp0.pt=temp1.pt
          GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;


按小时的点击率
create table if not exists sum0_click_percent_hour_temp
(
ad_percent double,
MRID string,
uid string,
bussiness_id string

)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table sum0_click_percent_hour_temp PARTITION (pt='2012-03-05 10')
  select
    click.click_sum/expose.EXPOSE_sum as ad_percent,
   click.mrid as mrid,
   click.uid as uid,
   click.bussiness_id as bussiness_id
from
   (select * from sum0_click_hour_temp where pt='2012-03-05 10') click
   JOIN(select pt as pt ,uid as uid,bussiness_id as bussiness_id  ,mrid as mrid ,EXPOSE_sum as EXPOSE_sum
  from sum0_expose_hour_temp WHERE pt ='2012-03-05 10'  )expose 
       on (click.MRID=expose.mrid and click.uid=expose.uid and click.bussiness_id=expose.bussiness_id)
  where expose.pt=click.pt;


小时级页面浏览数
create table if not exists sum0_click_browse_hour_temp
(
ad_browse int,
MRID string,
uid string,
bussiness_id string

)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table sum0_click_browse_hour_temp PARTITION (pt='2012-03-05 10')
  select
  COUNT(distinct(tmp2.page_id)) as ad_browse,
  tmp2.mrid as mrid,
  tmp2.uid as uid,
  tmp2.bussiness_id as bussiness_id
  from (select * from ad_effect where pt ='2012-03-05 10') tmp1
  join (select page_id, pt, SESSION_ID, bussiness_id, mrid ,uid from ad_PATH where pt='2012-03-05 10'
        and bussiness_id is not NULL) tmp2              
    on (tmp1.page_id = tmp2.page_id)
  join (select pt as pt,SESSION_ID as SESSION_ID, bussiness_id, uid from ad_SESSION
           where SOURCE_TYPE ='Direct' and pt = '2012-03-05 10') tmp3
    on (tmp2.SESSION_ID=tmp3.SESSION_ID and tmp2.uid=tmp3.uid and tmp2.bussiness_id=tmp3.bussiness_id)
  where   tmp1.pt=tmp2.pt=tmp3.pt
   GROUP BY tmp2.mrid,tmp2.uid,tmp2.bussiness_id;

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

安小时网页浏览/广告点击

create table if not exists sum0_clibro_hour_rate_temp
(
ad_clibro double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table sum0_clibro_hour_rate_temp PARTITION (pt='2012-03-05 10')
select
   temp0.ad_browse/temp1.click_sum as ad_clibro,
   temp0.mrid as MRID,
   temp0.uid as uid,
   temp0.bussiness_id as bussiness_id
   from (select * from sum0_click_browse_hour_temp where pt = '2012-03-05 10')temp0
          join(select bussiness_id,uid,pt,click_sum as click_sum, mrid as mrid from sum0_click_hour_temp where pt='2012-03-05 10') temp1
         on(temp0.mrid=temp1.mrid and temp1.pt=temp0.pt and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id )
  where temp0.pt=temp1.pt;


转化率

create table if not exists sum0_per_con_hour_temp
(
ad_percon double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table sum0_per_con_hour_temp PARTITION (pt='2012-03-05 10')
select
temp0.effect_sum/temp1.click_sum as ad_clibro,
temp0.mrid as MRID,
temp0.uid as uid,
temp0.bussiness_id as bussiness_id
from
  (select * from sum0_effect_hour_temp where pt ='2012-03-05 10' ) temp0
join(select * from sum0_click_hour_temp where pt ='2012-03-05 10') temp1
on(temp0.mrid=temp1.mrid and temp1.pt=temp0.pt and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id );
where temp0.pt=temp1.pt;


跳出率:

进入1步的离开数量
create table if not exists sum0_step1_hour_temp
(
ad_step1 int ,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

 

insert overwrite table sum0_step1_hour_temp PARTITION (pt='2012-03-05 10')
    select
       COUNT(distinct(temp0.session_id)) as ad_step1,
       temp0.MRID as MRID,
       temp0.uid as uid,
       temp0.bussiness_id as bussiness_id
       from (select * from AD_PATH where pt ='2012-03-05 10' and session_step='1') temp0 
          join ( select * from ad_session where pt ='2012-03-05 10' and source_type='3') temp1
           on(temp0.SESSION_ID=temp1.SESSION_ID and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
     where temp0.pt=temp1.pt
      GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;
进入所有步的离开数量
create table if not exists sum0_stepn_hour_temp
(
ad_stepn int ,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

 

insert overwrite table sum0_stepn_hour_temp PARTITION (pt='2012-03-05 10')
    select
       COUNT(distinct(temp0.session_id)) as ad_stepn,
       temp0.MRID as MRID,
       temp0.uid as uid,
       temp0.bussiness_id as bussiness_id
       from (select * from AD_PATH where pt ='2012-03-05 10' ) temp0 
          join ( select * from ad_session where pt ='2012-03-05 10' and source_type='3') temp1
           on(temp0.SESSION_ID=temp1.SESSION_ID and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
     where temp0.pt=temp1.pt
      GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;


// 计算跳出率
create table if not exists sum0_steprate_hour_temp
(
ad_step double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table sum0_steprate_hour_temp PARTITION (pt='2012-03-05 10')
select
       temp0.ad_step1/temp1.ad_stepn as ad_step,
       temp0.MRID as MRID,
       temp0.uid as uid,
       temp0.bussiness_id as bussiness_id
      from (select * from sum0_step1_hour_temp where pt ='2012-03-05 10' ) temp0 
      join ( select * from sum0_stepn_hour_temp where pt ='2012-03-05 10' )temp1
      on(
        temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id and temp0.mrid =temp0.mrid
       )

where temp0.pt=temp1.pt;


平均停留时间:

create table if not exists avg0_time_hour_temp
(
ad_avgtime double,
MRID string,
uid string,
bussiness_id string
)PARTITIONED BY (pt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS RCFILE;

insert overwrite table avg0_time_hour_temp PARTITION (pt='2012-03-05 10')
select
     avg(temp1.session_stay_seconds)  as ad_avgtime,
     temp0.MRID as MRID,
       temp0.uid as uid,
       temp0.bussiness_id as bussiness_id
      from (select * from ad_path where  pt ='2012-03-05 10') temp0
          join
            (select * from ad_session where pt ='2012-03-05 10') temp1
            on (temp0.SESSION_ID=temp1.SESSION_ID and temp0.uid=temp1.uid and temp0.bussiness_id=temp1.bussiness_id)
         where temp0.pt=temp1.pt
            GROUP BY temp0.mrid,temp0.uid,temp0.bussiness_id ;


你可能感兴趣的:(sql)