1、目标分区表的创建
drop table if exists xxxxxx_fans_view;
create table xxxxxx_fans_view(
datehour string,
uid string,
roomid string,
roomcreatoruid string,
staytime string)
partitioned by (
pt_day string);
drop table if exists xxxxxx_fans_login;
create table xxxxxx_fans_login(
datehour string,
uid string,
logincnt string)
partitioned by (
pt_day string);
2、进行跑批的Python脚本
/Users/nisj/PycharmProjects/BiDataProc/love/HiveRunData.py
# -*- coding=utf-8 -*-
import os
import re
import datetime
import warnings
warnings.filterwarnings("ignore")
def dateRange(beginDate, endDate):
dates = []
dt = datetime.datetime.strptime(beginDate, "%Y-%m-%d")
date = beginDate[:]
while date <= endDate:
dates.append(date)
dt = dt + datetime.timedelta(1)
date = dt.strftime("%Y-%m-%d")
return dates
def hiveRunData(pt_day):
pt_month=pt_day[0:7]
os.system("""/usr/lib/hive-current/bin/hive -e " \
alter table xxxxxx_fans_view drop partition(pt_day='{pt_day}'); \
alter table xxxxxx_fans_view add partition(pt_day='{pt_day}') location '{pt_day}'; \
insert overwrite table xxxxxx_fans_view partition(pt_day='{pt_day}') \
select substr(date_time,1,13) datehour,parms['uid'] uid,parms['roomId'] roomId,parms['roomCreatorUid'] roomCreatorUid,count(1) stayTime \
from oss_bi_all_room_heartbeat_log a1 \
where pt_month='{pt_month}' and pt_day='{pt_day}' \
group by substr(date_time,1,13),parms['uid'],parms['roomId'],parms['roomCreatorUid']; \
" """.format(pt_month=pt_month, pt_day=pt_day));
os.system("""/usr/lib/hive-current/bin/hive -e " \
alter table xxxxxx_fans_login drop partition(pt_day='{pt_day}'); \
alter table xxxxxx_fans_login add partition(pt_day='{pt_day}') location '{pt_day}'; \
insert overwrite table xxxxxx_fans_login partition(pt_day='{pt_day}') \
select substr(date_time,1,13) datehour,parms['uid'] uid,count(*) logincnt \
from oss_bi_all_login_log a1 \
where pt_month='{pt_month}' and pt_day='{pt_day}' \
group by substr(date_time,1,13),parms['uid']; \
" """.format(pt_month=pt_month, pt_day=pt_day));
for ptDay in dateRange(beginDate='2017-12-23', endDate='2018-01-22'):
print ptDay
hiveRunData(pt_day=ptDay)
事实上,并没有什么复杂的;就是记一下,备查。