1、目标表
drop table if exists xxyl0628_result;
CREATE TABLE `xxyl0628_result`(
`rn` string,
`game_name` string,
`active_anchor_cnt` string,
`active_uid_cnt` string,
`view_time` string,
`gift_point` string,
`cost_amount` string)
PARTITIONED BY (
`pt_month` string,
`partype` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://emr-cluster/user/hive/warehouse/xxyl0628_result'
;
2、主程序
/Users/nisj/PycharmProjects/BiDataProc/love/HiveRunData-yl0628.py
# -*- coding=utf-8 -*-
import os
import datetime
import warnings
import time
import threadpool
import calendar
import datetime
warnings.filterwarnings("ignore")
def dateRange(beginDate, endDate):
dates = []
dt = datetime.datetime.strptime(beginDate, "%Y-%m-%d")
date = beginDate[:]
while date <= endDate:
dates.append(date)
dt = dt + datetime.timedelta(1)
date = dt.strftime("%Y-%m-%d")
return dates
def monthRange(beginMonth, endMonth):
months = set([])
mt = datetime.datetime.strptime(beginMonth, "%Y-%m")
month = beginMonth[:]
while month <= endMonth:
months.add(month)
mt = mt + datetime.timedelta(1)
month = mt.strftime("%Y-%m")
monthList = sorted(months)
return monthList
def months_addsub(dt,months):
month = dt.month - 1 + months
year = dt.year + month / 12
month = month % 12 + 1
day = min(dt.day,calendar.monthrange(year,month)[1])
dt = dt.replace(year=year, month=month, day=day)
return str(dt.replace(year=year, month=month, day=day))
def getYesterday():
today = datetime.date.today()
oneday = datetime.timedelta(days=1)
yesterday = today - oneday
return yesterday
def hiveRunData(pt_month):
Yesterday=str(getYesterday())
pt_month_frist = pt_month+'-01'
pt_month_frist_format = datetime.date(int(pt_month_frist[0:4]), int(pt_month_frist[5:7]), int(pt_month_frist[8:10]))
pt_month_next = months_addsub(pt_month_frist_format, +1)[0:7]
os.system("""/usr/lib/hive-current/bin/hive -e " \
drop table if exists xxyl0628_live_salary; \
create table xxyl0628_live_salary as \
with tab_live as( \
select room_id,game_id,game_name,count(distinct case when live_mins>=30 then pt_day else null end) live_eff_day,sum(live_mins) live_mins,row_number()over(partition by room_id order by sum(live_mins) desc) rn_live_long,row_number()over(partition by room_id order by count(distinct case when live_mins>=30 then pt_day else null end) desc) rn_live_days \
from (select room_id,game_id,game_name,pt_day,sum(unix_timestamp(updated_time)-unix_timestamp(switch_time))/60 live_mins \
from oss_bi_all_live_history_status \
where pt_month='{pt_month}' \
and game_id<>-1 \
group by room_id,game_id,game_name,pt_day) a1 \
group by room_id,game_id,game_name), \
tab_salary as( \
select room_id,sum(amount) salary_amount,sum(case when type=1 then amount else 0 end) rank_salary,sum(case when type=2 then amount else 0 end) contract_salary \
from oss_bi_all_finance_salary_record \
where pt_month ='{pt_month_next}' \
and type in(1,2) and state=0 \
group by room_id) \
select a1.room_id,a1.game_id,a1.game_name,a1.live_eff_day,a1.live_mins,coalesce(a3.salary_amount,0) salary_concat_rank_amount \
from tab_live a1 \
inner join oss_bi_all_room a2 on a1.room_id=a2.id \
left join tab_salary a3 on a1.room_id=a3.room_id \
where a1.live_eff_day>=3 and a1.rn_live_long=1 and a1.rn_live_days=1 \
and a2.pt_day='{Yesterday}' and a2.is_profession=1 and a2.state=0; \
drop table if exists xxyl0628_view_gift; \
create table xxyl0628_view_gift as \
with tab_view as( \
select uid,roomid room_id,sum(view_time) view_time,row_number()over(partition by uid order by sum(view_time) desc) rn \
from recommend_data_view a1 \
where substr(pt_day,1,7)='{pt_month}' \
group by uid,roomid \
), \
tab_gift as( \
select room_id,uid,sum(gift_point) gift_point \
from honeycomb_all_gift_record \
where pt_month='{pt_month}' \
and gift_point<>0 \
group by room_id,uid) \
select a1.uid,a1.room_id,a1.view_time,coalesce(a3.gift_point,0) gift_point,a2.game_id,a2.game_name,a2.salary_concat_rank_amount \
from tab_view a1 \
inner join xxyl0628_live_salary a2 on a1.room_id=a2.room_id \
left join tab_gift a3 on a1.uid=a3.uid and a1.room_id=a3.room_id \
where a1.rn=1; \
" """.format(pt_month=pt_month, pt_month_next=pt_month_next, Yesterday=Yesterday));
os.system("""/usr/lib/hive-current/bin/hive -e " \
alter table xxyl0628_result drop partition(pt_month='{pt_month}',partype={partype}); \
alter table xxyl0628_result add partition(pt_month='{pt_month}',partype={partype}) location '{pt_month}/{partype}'; \
with tab_result as( \
select row_number()over(order by sum(gift_point) desc) rn,a1.game_name,count(distinct room_id) active_anchor_cnt,count(distinct uid) active_uid_cnt,sum(view_time) view_time,sum(gift_point) gift_point,sum(gift_point)/1000/2 cost_amount1 \
from xxyl0628_view_gift a1 \
group by a1.game_name), \
tab_salary as( \
select a1.game_name, sum(a1.salary_concat_rank_amount) cost_amount2 \
from xxyl0628_live_salary a1 \
group by a1.game_name) \
insert overwrite table xxyl0628_result partition(pt_month='{pt_month}',partype={partype}) \
select a1.rn,a1.game_name,a1.active_anchor_cnt,a1.active_uid_cnt,a1.view_time,a1.gift_point,cast(cost_amount1+cost_amount2 as bigint) cost_amount \
from tab_result a1 \
left join tab_salary a2 on a1.game_name=a2.game_name \
where a1.rn<=100; \
" """.format(pt_month=pt_month,partype=1));
os.system("""/usr/lib/hive-current/bin/hive -e " \
alter table xxyl0628_result drop partition(pt_month='{pt_month}',partype={partype}); \
alter table xxyl0628_result add partition(pt_month='{pt_month}',partype={partype}) location '{pt_month}/{partype}'; \
with tab_result as( \
select row_number()over(order by count(distinct room_id) desc) rn,a1.game_name,count(distinct room_id) active_anchor_cnt,count(distinct uid) active_uid_cnt,sum(view_time) view_time,sum(gift_point) gift_point,sum(gift_point)/1000/2 cost_amount1 \
from xxyl0628_view_gift a1 \
group by a1.game_name), \
tab_salary as( \
select a1.game_name, sum(a1.salary_concat_rank_amount) cost_amount2 \
from xxyl0628_live_salary a1 \
group by a1.game_name) \
insert overwrite table xxyl0628_result partition(pt_month='{pt_month}',partype={partype}) \
select a1.rn,a1.game_name,a1.active_anchor_cnt,a1.active_uid_cnt,a1.view_time,a1.gift_point,cast(cost_amount1+cost_amount2 as bigint) cost_amount \
from tab_result a1 \
left join tab_salary a2 on a1.game_name=a2.game_name \
where a1.rn<=100; \
" """.format(pt_month=pt_month,partype=2));
os.system("""/usr/lib/hive-current/bin/hive -e " \
alter table xxyl0628_result drop partition(pt_month='{pt_month}',partype={partype}); \
alter table xxyl0628_result add partition(pt_month='{pt_month}',partype={partype}) location '{pt_month}/{partype}'; \
with tab_result as( \
select row_number()over(order by count(distinct uid) desc) rn,a1.game_name,count(distinct room_id) active_anchor_cnt,count(distinct uid) active_uid_cnt,sum(view_time) view_time,sum(gift_point) gift_point,sum(gift_point)/1000/2 cost_amount1 \
from xxyl0628_view_gift a1 \
group by a1.game_name), \
tab_salary as( \
select a1.game_name, sum(a1.salary_concat_rank_amount) cost_amount2 \
from xxyl0628_live_salary a1 \
group by a1.game_name) \
insert overwrite table xxyl0628_result partition(pt_month='{pt_month}',partype={partype}) \
select a1.rn,a1.game_name,a1.active_anchor_cnt,a1.active_uid_cnt,a1.view_time,a1.gift_point,cast(cost_amount1+cost_amount2 as bigint) cost_amount \
from tab_result a1 \
left join tab_salary a2 on a1.game_name=a2.game_name \
where a1.rn<=100; \
" """.format(pt_month=pt_month,partype=3));
os.system("""/usr/lib/hive-current/bin/hive -e " \
alter table xxyl0628_result drop partition(pt_month='{pt_month}',partype={partype}); \
alter table xxyl0628_result add partition(pt_month='{pt_month}',partype={partype}) location '{pt_month}/{partype}'; \
with tab_result as( \
select row_number()over(order by sum(view_time) desc) rn,a1.game_name,count(distinct room_id) active_anchor_cnt,count(distinct uid) active_uid_cnt,sum(view_time) view_time,sum(gift_point) gift_point,sum(gift_point)/1000/2 cost_amount1 \
from xxyl0628_view_gift a1 \
group by a1.game_name), \
tab_salary as( \
select a1.game_name, sum(a1.salary_concat_rank_amount) cost_amount2 \
from xxyl0628_live_salary a1 \
group by a1.game_name) \
insert overwrite table xxyl0628_result partition(pt_month='{pt_month}',partype={partype}) \
select a1.rn,a1.game_name,a1.active_anchor_cnt,a1.active_uid_cnt,a1.view_time,a1.gift_point,cast(cost_amount1+cost_amount2 as bigint) cost_amount \
from tab_result a1 \
left join tab_salary a2 on a1.game_name=a2.game_name \
where a1.rn<=100; \
" """.format(pt_month=pt_month,partype=4));
# run parallel Batch
now_time = time.strftime('%Y-%m-%d %X', time.localtime())
print "当前时间是:",now_time
runDay_list = monthRange(beginMonth='2017-05', endMonth='2019-05')
requests = []
request_hiveRunData2localFile_batchCtl = threadpool.makeRequests(hiveRunData, runDay_list)
requests.extend(request_hiveRunData2localFile_batchCtl)
main_pool = threadpool.ThreadPool(1)
[main_pool.putRequest(req) for req in requests]
if __name__ == '__main__':
while True:
try:
time.sleep(30)
main_pool.poll()
except KeyboardInterrupt:
print("**** Interrupted!")
break
except threadpool.NoResultsPending:
break
if main_pool.dismissedWorkers:
print("Joining all dismissed worker threads...")
main_pool.joinAllDismissedWorkers()
now_time = time.strftime('%Y-%m-%d %X', time.localtime())
print "当前时间是:",now_time
3、说明
由于特殊情况,不能使用临时表,所以不能使用并行跑批。