创建日期时间的维度表,以年为维度,以季度为维度,以月为维度,以天为维度,各自创建时间维度hive表

创建日期时间的维度表,以年为维度,以季度为维度,以月为维度,以天为维度,各自创建时间维度hive表,

--创建年维度表
create table your_db.your_table_name(
  year_id int comment '年份ID:yyyy',
  year_name string comment '年份名称,2019年',
  is_leap_year tinyint comment '是否为闰月:1是,0否',
  start_date string  comment '开始日期',
  end_date string comment '结束日期',
  day_num int comment '多少天',
  lunar_year_animal string comment '生肖',
  lunar_year string comment '阴历的天干地支'
  ) row format delimited fields terminated by ',' 
 stored as textfile;


load data local inpath '/dat/tmp/your_file.txt' into table table_name;

--创建季度维度表
-- quarter_id, quarter_order, quarter_name, season_name, start_date, end_date, day_num, year_id

create table if not exists table_name(
  quarter_id int comment 'quarterID [20191,20192,20193,201904]',
  quarter_order int comment '1,2,3,4',
  quarter_name string comment 'quarter name',
  season_name string comment 'season name',
  start_date string  comment 'start date',
  end_date string comment 'end date',
  day_num int comment 'day number',
  year_id int comment 'year id' 
  )  row format delimited fields terminated by ',' 
 stored as textfile;

--创建月份维度表
-- month_id, month_order, month_name, start_date, end_date, day_num, quarter_id, year_id
create table table_name(
  month_id int comment 'monthID [201901~201912]',
  month_order int comment '1~12',
  month_name string comment 'month name',
  start_date string comment 'start date',
  end_date string  comment 'end date',
  day_num string comment 'day number',
  quarter_id int comment 'quarter id',
  year_id int comment 'year id' 
  )  row format delimited fields terminated by ',' 
 stored as textfile;


import calendar
import datetime

def year_lunar(ly):
    y = int(ly)
    tg = '甲 乙 丙 丁 戊 己 庚 辛 壬 癸'.split()
    dz = '子 丑 寅 卯 辰 巳 午 未 申 酉 戌 亥'.split()
    sx = '鼠 牛 虎 兔 龙 蛇 马 羊 猴 鸡 狗 猪'.split()
    return tg[(y - 4) % 10] + dz[(y - 4) % 12] + '[' + sx[(y - 4) % 12] + ']' + '年'

def year_sx(year):
    sx = '鼠 牛 虎 兔 龙 蛇 马 羊 猴 鸡 狗 猪'.split()
    return sx[(int(year) - 4) % 12]

def year_tg(year):
    tg = '甲 乙 丙 丁 戊 己 庚 辛 壬 癸'.split()
    return tg[(int(year) - 4) % 10]

def year_dz(year):
    dz = '子 丑 寅 卯 辰 巳 午 未 申 酉 戌 亥'.split()
    return dz[(int(year) - 4) % 12]

def is_leap_year(year):
    year = int(year)
    if (year % 4) == 0:
        if (year % 100) == 0:
            # 整百年能被400整除的是闰年
            if (year % 400) == 0:
                return 1
            else:
                return 0
        # 非整百年能被4整除的为闰年
        else:
            return 1
    else:
        return 0


def get_firstDay_and_lastDay(year, month):
    year = int(year)
    month = int(month)
    # 输出的是一个元组,第一个元素是上一个月的最后一天为星期几(0-6),星期天为0;第二个元素是这个月的天数
    firstDayWeekDay, monthRange = calendar.monthrange(year, month)
    # 获取当月的第一天
    firstDay = datetime.date(year=year, month=month, day=1)
    lastDay = datetime.date(year=year, month=month, day=monthRange)
    return firstDay,lastDay,monthRange


def handler_year(startYear, endYear):
    f = open('dim_year.txt', mode='a', encoding='utf-8')
    for y in range(int(startYear),int(endYear)+1):
        isLeapYear = is_leap_year(y)
        dayNum = 365+isLeapYear
        row = "{0},{1}年,{2},{3}-01-01,{4}-12-31,{5},{6}{7},{8}\n".format(y, y, isLeapYear, y, y, dayNum, year_tg(y), year_dz(y), year_sx(y))
        print(row)
        f.write(row)

def handler_quarter(startYear, endYear):
    # 一季度:31+28+31=90
    # 二季度:30+31+30=91
    # 三季度:31+31+30=92
    # 四季度:31+30+31=92
    quarterDays = [90, 91, 92, 92]
    seasons = ['春', '夏', '秋', '冬']
    startDays = ['01-01', '04-01', '07-01', '10-01']
    endDays = ['03-31', '06-30', '09-30', '12-31']
    f = open('dim_quarter.txt', mode='w', encoding='utf-8')
    for y in range(int(startYear), int(endYear) + 1):
        # 是否是闰年,是返回 1否则返回 0
        isLeapYear = is_leap_year(y)
        for q in [1,2,3,4]:
            # quarter_id, quarter_order, quarter_name, season_name, start_date, end_date, day_num, year_id
            index = q-1
            quarterDay = quarterDays[index]+isLeapYear
            row = "{0}{1},{1},第{1}季度,{2},{0}-{3},{0}-{4},{5},{0}\n".format(y, q, seasons[index], startDays[index],endDays[index],quarterDay )
            print(row)
            f.write(row)

def handler_month(startYear, endYear):
    f = open('dim_month.txt', mode='w', encoding='utf-8')
    for y in range(int(startYear), int(endYear) + 1):
        quarterId = '{0}{1}'.format(y, 1)
        qid = 1
        for m in range(1, 13):
            firstDay, lastDay, dayNum = get_firstDay_and_lastDay(y, m)
            # month_id, month_order, month_name, start_date, end_date, day_num, quarter_id, year_id
            row = "{0}{1:0>2},{1},{1}月份,{2},{3},{4},{5},{0}\n".format(y, m, firstDay, lastDay, dayNum, quarterId)
            print(row)
            f.write(row)
            if (qid % 3 == 0):
                quarterId = int(quarterId) + 1
            qid += 1


if __name__ == '__main__':
    startYear = '1900'
    endYear = '2100'
    # 处理年维度表
    #handler_year(startYear, endYear)

    #季节维度表
    #handler_quarter(startYear, endYear)

    # 处理月份维度表
    handler_month(startYear, endYear)


你可能感兴趣的:(数据仓库)