#!/bin/sh
#最常购买门店
orgSql="
use tmp;
drop table if exists member_offline_org_prefer;
create table member_offline_org_prefer as
select * from (
select a.*,row_number() over(partition by a.vipcardno order by a.ordernum desc) as rn from(
select vipcardno,orgcode,count(1) ordernum from(
select vipcardno,orgcode,saleno from sourcedata.posdmdetails group by vipcardno,orgcode,saleno)t group by vipcardno,orgcode )a )t where rn<=1
"
#hive -e "$orgSql"
#线下购买时间偏好,没有时间,只有日结日期,暂不做
#线下品类偏好,目前没有品类数据,暂不做
#线下商品偏好
offlineProductSql="
use tmp;
drop table if exists member_offline_product_prefer;
create table member_offline_product_prefer as
select * from(
select a.*,row_number() over(partition by a.vipcardno order by a.num desc) as rn from(
select vipcardno,itemid,count(1) num from sourcedata.posdmdetails group by vipcardno,itemid)a) t where rn<=10
"
#hive -e "$offlineProductSql"
#线下会员订单数中间表
offlineOrdernumSql="
use tmp;
drop table if exists member_offline_ordernum;
create table member_offline_ordernum as
select vipcardno,
count(1) offline_total_ordernum,
sum(case when year(rjdate)=year(current_timestamp) then 1 else 0 end) offline_year_ordernum,
sum(case when year(rjdate)=year(current_timestamp)-1 then 1 else 0 end) offline_last_year_ordernum,
sum(case when rjdate>= date_sub(current_timestamp,30) then 1 else 0 end) offline_last_1m_ordernum,
sum(case when rjdate>= date_sub(current_timestamp,90) then 1 else 0 end) offline_last_3m_ordernum,
sum(case when rjdate>= date_sub(current_timestamp,180) then 1 else 0 end) offline_last_6m_ordernum
from(
select vipcardno,orgcode,saleno,rjdate from sourcedata.posdmdetails where create_month >='2016-01'
group by vipcardno,orgcode,saleno,rjdate)t group by vipcardno
"
#hive -e "$offlineOrdernumSql"
#线下会员购买门店数中间表
offlineOgrnumSql="
use tmp;
drop table if exists member_offline_orgnum;
create table member_offline_orgnum as
select vipcardno,count(1) offline_orgnum from(
select vipcardno,orgcode from sourcedata.posdmdetails where create_month>='2016-01' group by vipcardno,orgcode)t group by vipcardno
"
#hive -e "$offlineOgrnumSql"
#最后一次购买门店
lastOrgSql="
use tmp;
drop table if exists member_offline_last_org;
create table member_offline_last_org as
select * from(
select a.*,row_number() over(partition by a.vipcardno order by a.offline_latest_buy_time desc) as rn from(
select vipcardno,orgcode,max(rjdate) offline_latest_buy_time from sourcedata.posdmdetails group by vipcardno,orgcode)a)t where rn<=1
"
#hive -e "$lastOrgSql"
#left join c表获取门店数是为了避免使用count distinct 效率太低
offlineSql="
use tmp;
drop table if exists member_offline_order;
set hive.map.aggr=true;
set hive.groupby.skewindata=true;
set mapreduce.map.java.opts=-Xmx3072M -XX:-UseGCOverheadLimit;
set mapred.reduce.tasks=30;
create table member_offline_order as
select a.vipcardno,e.offline_org_prefer,c.offline_orgnum,
a.offline_max_amount,a.offline_min_amount,a.offline_total_amount,
a.offline_last_year_paymoney,'' as offline_buy_time_prefer,d.offline_latest_buy_time,datediff(current_timestamp,d.offline_latest_buy_time) as offline_last_buy_days,d.orgcode as offline_latest_buy_org,
'' as offline_product_category_prefer,f.offline_product_prefer,0 as offline_vip_skunum,0 as offline_vip_ordernum,
0 as offline_app_vip_skunum,0 as offline_app_vip_ordernum,b.offline_total_ordernum,b.offline_year_ordernum,
b.offline_last_year_ordernum,b.offline_last_1m_ordernum,b.offline_last_3m_ordernum,b.offline_last_6m_ordernum,
round(a.offline_total_amount/b.offline_total_ordernum,2) as offline_avg_amount
from
(
select vipcardno,
max(totalmoney) offline_max_amount,
min(totalmoney) offline_min_amount,
sum(sstotal) offline_total_amount,
sum(case when year(rjdate)=year(current_timestamp)-1 then sstotal else 0 end) offline_last_year_paymoney
from sourcedata.posdmdetails where create_month>='2016-01' group by vipcardno
)a left join
(
select vipcardno,offline_total_ordernum,offline_year_ordernum,offline_last_year_ordernum,offline_last_1m_ordernum,offline_last_3m_ordernum,offline_last_6m_ordernum from tmp.member_offline_ordernum
) b on a.vipcardno=b.vipcardno
left join
(
select vipcardno,offline_orgnum from tmp.member_offline_orgnum
)c on a.vipcardno=c.vipcardno
left join
(select vipcardno, orgcode,offline_latest_buy_time from tmp.member_offline_last_org)d on a.vipcardno=d.vipcardno
left join
(select vipcardno,concat_ws(',' ,collect_set(orgcode)) offline_org_prefer from tmp.member_offline_org_prefer group by vipcardno)e on a.vipcardno=e.vipcardno
left join
(select vipcardno,concat_ws(',' ,collect_set(itemid)) offline_product_prefer from tmp.member_offline_product_prefer group by vipcardno) f on a.vipcardno=f.vipcardno
"
hive -e "$offlineSql"