创建日期时间的维度表,以年为维度,以季度为维度,以月为维度,以天为维度,各自创建时间维度hive表,
--创建年维度表
create table your_db.your_table_name(
year_id int comment '年份ID:yyyy',
year_name string comment '年份名称,2019年',
is_leap_year tinyint comment '是否为闰月:1是,0否',
start_date string comment '开始日期',
end_date string comment '结束日期',
day_num int comment '多少天',
lunar_year_animal string comment '生肖',
lunar_year string comment '阴历的天干地支'
) row format delimited fields terminated by ','
stored as textfile;
load data local inpath '/dat/tmp/your_file.txt' into table table_name;
--创建季度维度表
-- quarter_id, quarter_order, quarter_name, season_name, start_date, end_date, day_num, year_id
create table if not exists table_name(
quarter_id int comment 'quarterID [20191,20192,20193,201904]',
quarter_order int comment '1,2,3,4',
quarter_name string comment 'quarter name',
season_name string comment 'season name',
start_date string comment 'start date',
end_date string comment 'end date',
day_num int comment 'day number',
year_id int comment 'year id'
) row format delimited fields terminated by ','
stored as textfile;
--创建月份维度表
-- month_id, month_order, month_name, start_date, end_date, day_num, quarter_id, year_id
create table table_name(
month_id int comment 'monthID [201901~201912]',
month_order int comment '1~12',
month_name string comment 'month name',
start_date string comment 'start date',
end_date string comment 'end date',
day_num string comment 'day number',
quarter_id int comment 'quarter id',
year_id int comment 'year id'
) row format delimited fields terminated by ','
stored as textfile;
import calendar
import datetime
def year_lunar(ly):
y = int(ly)
tg = '甲 乙 丙 丁 戊 己 庚 辛 壬 癸'.split()
dz = '子 丑 寅 卯 辰 巳 午 未 申 酉 戌 亥'.split()
sx = '鼠 牛 虎 兔 龙 蛇 马 羊 猴 鸡 狗 猪'.split()
return tg[(y - 4) % 10] + dz[(y - 4) % 12] + '[' + sx[(y - 4) % 12] + ']' + '年'
def year_sx(year):
sx = '鼠 牛 虎 兔 龙 蛇 马 羊 猴 鸡 狗 猪'.split()
return sx[(int(year) - 4) % 12]
def year_tg(year):
tg = '甲 乙 丙 丁 戊 己 庚 辛 壬 癸'.split()
return tg[(int(year) - 4) % 10]
def year_dz(year):
dz = '子 丑 寅 卯 辰 巳 午 未 申 酉 戌 亥'.split()
return dz[(int(year) - 4) % 12]
def is_leap_year(year):
year = int(year)
if (year % 4) == 0:
if (year % 100) == 0:
# 整百年能被400整除的是闰年
if (year % 400) == 0:
return 1
else:
return 0
# 非整百年能被4整除的为闰年
else:
return 1
else:
return 0
def get_firstDay_and_lastDay(year, month):
year = int(year)
month = int(month)
# 输出的是一个元组,第一个元素是上一个月的最后一天为星期几(0-6),星期天为0;第二个元素是这个月的天数
firstDayWeekDay, monthRange = calendar.monthrange(year, month)
# 获取当月的第一天
firstDay = datetime.date(year=year, month=month, day=1)
lastDay = datetime.date(year=year, month=month, day=monthRange)
return firstDay,lastDay,monthRange
def handler_year(startYear, endYear):
f = open('dim_year.txt', mode='a', encoding='utf-8')
for y in range(int(startYear),int(endYear)+1):
isLeapYear = is_leap_year(y)
dayNum = 365+isLeapYear
row = "{0},{1}年,{2},{3}-01-01,{4}-12-31,{5},{6}{7},{8}\n".format(y, y, isLeapYear, y, y, dayNum, year_tg(y), year_dz(y), year_sx(y))
print(row)
f.write(row)
def handler_quarter(startYear, endYear):
# 一季度:31+28+31=90
# 二季度:30+31+30=91
# 三季度:31+31+30=92
# 四季度:31+30+31=92
quarterDays = [90, 91, 92, 92]
seasons = ['春', '夏', '秋', '冬']
startDays = ['01-01', '04-01', '07-01', '10-01']
endDays = ['03-31', '06-30', '09-30', '12-31']
f = open('dim_quarter.txt', mode='w', encoding='utf-8')
for y in range(int(startYear), int(endYear) + 1):
# 是否是闰年,是返回 1否则返回 0
isLeapYear = is_leap_year(y)
for q in [1,2,3,4]:
# quarter_id, quarter_order, quarter_name, season_name, start_date, end_date, day_num, year_id
index = q-1
quarterDay = quarterDays[index]+isLeapYear
row = "{0}{1},{1},第{1}季度,{2},{0}-{3},{0}-{4},{5},{0}\n".format(y, q, seasons[index], startDays[index],endDays[index],quarterDay )
print(row)
f.write(row)
def handler_month(startYear, endYear):
f = open('dim_month.txt', mode='w', encoding='utf-8')
for y in range(int(startYear), int(endYear) + 1):
quarterId = '{0}{1}'.format(y, 1)
qid = 1
for m in range(1, 13):
firstDay, lastDay, dayNum = get_firstDay_and_lastDay(y, m)
# month_id, month_order, month_name, start_date, end_date, day_num, quarter_id, year_id
row = "{0}{1:0>2},{1},{1}月份,{2},{3},{4},{5},{0}\n".format(y, m, firstDay, lastDay, dayNum, quarterId)
print(row)
f.write(row)
if (qid % 3 == 0):
quarterId = int(quarterId) + 1
qid += 1
if __name__ == '__main__':
startYear = '1900'
endYear = '2100'
# 处理年维度表
#handler_year(startYear, endYear)
#季节维度表
#handler_quarter(startYear, endYear)
# 处理月份维度表
handler_month(startYear, endYear)