用户画像之线下订单

#!/bin/sh

#最常购买门店
orgSql="
use tmp;
drop table if exists member_offline_org_prefer;
create table member_offline_org_prefer as
select * from (
select a.*,row_number() over(partition by a.vipcardno order by a.ordernum desc) as rn from(
select vipcardno,orgcode,count(1) ordernum from(
select vipcardno,orgcode,saleno from sourcedata.posdmdetails group by vipcardno,orgcode,saleno)t group by vipcardno,orgcode )a )t where rn<=1
"
#hive -e "$orgSql"

#线下购买时间偏好,没有时间,只有日结日期,暂不做

#线下品类偏好,目前没有品类数据,暂不做

#线下商品偏好
offlineProductSql="
use tmp;
drop table if exists member_offline_product_prefer;
create table member_offline_product_prefer as 
select * from(
select a.*,row_number() over(partition by a.vipcardno order by a.num desc) as rn from(
select vipcardno,itemid,count(1) num from sourcedata.posdmdetails group by vipcardno,itemid)a) t where rn<=10
"

#hive -e "$offlineProductSql"

#线下会员订单数中间表
offlineOrdernumSql="
use tmp;
drop table if exists member_offline_ordernum;
create table member_offline_ordernum as 
select vipcardno,
count(1) offline_total_ordernum,
sum(case when year(rjdate)=year(current_timestamp)  then 1 else 0 end) offline_year_ordernum,
sum(case when year(rjdate)=year(current_timestamp)-1  then 1 else 0 end) offline_last_year_ordernum,
sum(case when rjdate>= date_sub(current_timestamp,30) then 1 else 0 end) offline_last_1m_ordernum,
sum(case when rjdate>= date_sub(current_timestamp,90) then 1 else 0 end) offline_last_3m_ordernum,
sum(case when rjdate>= date_sub(current_timestamp,180) then 1 else 0 end) offline_last_6m_ordernum
from(
select vipcardno,orgcode,saleno,rjdate from sourcedata.posdmdetails where  create_month >='2016-01'
group by vipcardno,orgcode,saleno,rjdate)t group by vipcardno
"
#hive -e "$offlineOrdernumSql"

#线下会员购买门店数中间表
offlineOgrnumSql="
use tmp;
drop table if exists member_offline_orgnum;
create table member_offline_orgnum as 
select vipcardno,count(1) offline_orgnum from(
select vipcardno,orgcode from sourcedata.posdmdetails where create_month>='2016-01' group by vipcardno,orgcode)t group by vipcardno
"
#hive -e "$offlineOgrnumSql"

#最后一次购买门店
lastOrgSql="
use tmp;
drop table if exists member_offline_last_org;
create table member_offline_last_org as 
select * from(
select a.*,row_number() over(partition by a.vipcardno order by a.offline_latest_buy_time desc) as rn from(
select vipcardno,orgcode,max(rjdate) offline_latest_buy_time from sourcedata.posdmdetails group by vipcardno,orgcode)a)t where rn<=1
"
#hive -e "$lastOrgSql"

#left join c表获取门店数是为了避免使用count distinct 效率太低

offlineSql="
use tmp;
drop table if exists member_offline_order;
set hive.map.aggr=true;
set hive.groupby.skewindata=true;
set mapreduce.map.java.opts=-Xmx3072M -XX:-UseGCOverheadLimit;
set mapred.reduce.tasks=30;
create table member_offline_order as 
select a.vipcardno,e.offline_org_prefer,c.offline_orgnum,
a.offline_max_amount,a.offline_min_amount,a.offline_total_amount,
a.offline_last_year_paymoney,'' as offline_buy_time_prefer,d.offline_latest_buy_time,datediff(current_timestamp,d.offline_latest_buy_time) as offline_last_buy_days,d.orgcode as offline_latest_buy_org,
'' as offline_product_category_prefer,f.offline_product_prefer,0 as offline_vip_skunum,0 as offline_vip_ordernum,
0 as offline_app_vip_skunum,0 as offline_app_vip_ordernum,b.offline_total_ordernum,b.offline_year_ordernum,
b.offline_last_year_ordernum,b.offline_last_1m_ordernum,b.offline_last_3m_ordernum,b.offline_last_6m_ordernum,
round(a.offline_total_amount/b.offline_total_ordernum,2) as offline_avg_amount
from
(
select vipcardno,
max(totalmoney) offline_max_amount,
min(totalmoney) offline_min_amount,
sum(sstotal) offline_total_amount,
sum(case when year(rjdate)=year(current_timestamp)-1  then sstotal else 0 end) offline_last_year_paymoney
from sourcedata.posdmdetails where create_month>='2016-01' group by vipcardno
)a left join
(
select vipcardno,offline_total_ordernum,offline_year_ordernum,offline_last_year_ordernum,offline_last_1m_ordernum,offline_last_3m_ordernum,offline_last_6m_ordernum from tmp.member_offline_ordernum
) b on a.vipcardno=b.vipcardno
left join
(
select vipcardno,offline_orgnum from tmp.member_offline_orgnum
)c on a.vipcardno=c.vipcardno
left join
(select vipcardno, orgcode,offline_latest_buy_time from tmp.member_offline_last_org)d on a.vipcardno=d.vipcardno
left join
(select vipcardno,concat_ws(',' ,collect_set(orgcode)) offline_org_prefer from tmp.member_offline_org_prefer group by vipcardno)e on a.vipcardno=e.vipcardno
left join
(select vipcardno,concat_ws(',' ,collect_set(itemid)) offline_product_prefer from tmp.member_offline_product_prefer group by vipcardno) f on a.vipcardno=f.vipcardno
"

hive -e "$offlineSql"

你可能感兴趣的:(hadoop)