Java8实现批量生成数仓日期维度数据

  公司最近要开始数仓建设,那数仓的一些基础维度表要先生成,这里介绍最重要的维度表之一日期维度表的生成。整个过程分三步走:

  1. 使用java8的日期类生成日期维度表所需要的各种字段;
  2. 将生成的记录使用springboot+mybatis批量插入到mysql中;
  3. 使用sqoop将mysql表中的数据导入到hive中;

一、mysql中设计日期维度表

CREATE TABLE `dim_date` (
  `date_id` int(8) DEFAULT NULL COMMENT '日期数字格式,如:20200101',
  `date` varchar(20) CHARACTER SET utf8 DEFAULT NULL COMMENT '日期字符串格式,如:2020-01-01',
  `week` int(4) DEFAULT NULL COMMENT '周几的数字,如:1,2,3,4,5,6,7',
  `week_cn` varchar(20) DEFAULT NULL COMMENT '周几的中文表示,如:星期一',
  `year_week` varchar(20) DEFAULT NULL COMMENT '年周组合,如:2020-34',
  `month` int(4) DEFAULT NULL COMMENT '月份数字,如:1,2...12',
  `year_month` varchar(20) DEFAULT NULL COMMENT '年月组合,如:2020-01',
  `quarter` int(4) DEFAULT NULL COMMENT '季度数字,如:1,2,3,4',
  `year_quarter` varchar(20) DEFAULT NULL COMMENT '年季度组合,如:2020-Q1',
  `year` int(4) DEFAULT NULL COMMENT '年数字,如:2020',
  `day_in_year` int(4) DEFAULT NULL COMMENT '一年中的第几天,如:365',
  `day_in_month` int(4) DEFAULT NULL COMMENT '一月中的第几天,如:28',
  `is_first_day_in_month` varchar(20) DEFAULT NULL COMMENT '是否是月的第一天(Y:是;N:否)',
  `is_last_day_in_month` varchar(20) DEFAULT NULL COMMENT '是否是月的最后一天(Y:是;N:否)',
  `week_in_month` int(4) DEFAULT NULL COMMENT '月的第几周,如:1',
  `week_in_year` int(4) DEFAULT NULL COMMENT '年的第几周,如:30',
  `date_type` varchar(20) DEFAULT NULL COMMENT '日期类型:工作日:WORKDAY;国家法定节假日:HOLIDAY;周末休息日:WEEKEND;节假日调休:EXCHANGING_HOLIDAY'
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='日期维度表';

二、编写Java代码

1、编写日期类实体


public class DimDate {
    private Integer dateId;

    private String date;

    private Integer week;

    private String weekCn;

    private String yearWeek;

    private Integer month;

    private String yearMonth;

    private Integer quarter;

    private String yearQuarter;

    private Integer year;

    private Integer dayInYear;

    private Integer dayInMonth;

    private String isFirstDayInMonth;

    private String isLastDayInMonth;

    private Integer weekInMonth;

    private Integer weekInYear;

    private String dateType;
}

2、生成日期类核心代码

代码中调用了第三方的节假日API接口,以获取节假日信息。

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.zstax.domain.DimDate;
import com.zstax.enums.DateTypeEnum;
import org.apache.commons.lang3.StringUtils;

import java.time.DayOfWeek;
import java.time.LocalDate;
import java.time.Period;
import java.time.format.DateTimeFormatter;
import java.time.temporal.TemporalAdjusters;
import java.time.temporal.WeekFields;
import java.util.*;

/**
 * @author: ndf
 * @date: 2021/5/20 11:36
 * @description:
 */
public class GenerateDimDate {
    private static Map weekCNMap = new HashMap<>();
    private static String API_URL = "http://api.tianapi.com/txapi/jiejiari/index";

    static {
        weekCNMap.put(1, "星期一");
        weekCNMap.put(2, "星期二");
        weekCNMap.put(3, "星期三");
        weekCNMap.put(4, "星期四");
        weekCNMap.put(5, "星期五");
        weekCNMap.put(6, "星期六");
        weekCNMap.put(7, "星期日");
    }



    /**
     * 根据开始日期和结束日期返回dimDate实体列表
     *
     * @param startDate 开始日期格式:yyyy-MM-dd
     * @param endDate   结束日期格式:yyyy-MM-dd
     * @return
     * @throws Exception
     */
    public static List getDimDateListByRange(String startDate, String endDate) throws Exception {
        if (StringUtils.isBlank(startDate) || StringUtils.isBlank(endDate)) {
            throw new Exception("开始日期和结束日期不能为空!");
        }
        String[] startDateArr = startDate.split("-");
        String[] endDateArr = endDate.split("-");
        if (startDate.length() != 3 && endDate.length() != 3) {
            throw new Exception("开始日期或结束日期格式不正确!");
        }
        if (DateUtils.getDistanceBetweenDates(startDate, endDate) < 0) {
            throw new Exception("开始日期不能大于结束日期!");
        }
        int startYear = Integer.parseInt(startDateArr[0]);
        int endYear = Integer.parseInt(endDateArr[0]);
        if (startYear != endYear) {
            throw new Exception("开始日期和结束日期不在同一个年份!");
        }
        Map> holidayDatesMap = getHolidayDates(startYear);

        return getDimDateListByRange(startDate, endDate, holidayDatesMap);
    }

    /**
     * 通过年份获取一整年的数据
     *
     * @param year
     * @return
     */
    public static List getDimDateListByYear(int year) throws Exception {
        int nowYear = LocalDate.now().getYear();

        if (year > nowYear) {
            throw new Exception("无法获取当前年份之后的节假日信息,故不能传入大于当前年的年份!");
        }

        Map> holidayDatesMap = getHolidayDates(year);
        String startDate = DateUtils.getFirstDayOfYear(year);
        String endDate = DateUtils.getLastDayOfYear(year);

        return getDimDateListByRange(startDate, endDate, holidayDatesMap);
    }

    /**
     * 处理区间日期逻辑
     *
     * @param startDate
     * @param endDate
     * @param holidaysMap
     * @return
     */
    private static List getDimDateListByRange(String startDate, String endDate, Map> holidaysMap) {
        String currentDate = startDate;
        List dimDates = new ArrayList<>();
        while (DateUtils.getDistanceBetweenDates(currentDate, endDate) >= 0) {
            DimDate dimDate = getDimDate(currentDate, holidaysMap);
            dimDates.add(dimDate);
            currentDate = DateUtils.getAfterDays(currentDate, 1);
        }
        dimDates.sort(Comparator.comparing(DimDate::getDateId));
        return dimDates;
    }

    /**
     * 获取dimDate实体
     *
     * @param date        将要处理的日期
     * @param holidaysMap 节假日信息Map
     * @return
     */
    public static DimDate getDimDate(String date, Map> holidaysMap) {
        Integer dateId = Integer.parseInt(date.replace("-", ""));
        DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyy-MM-dd");
        LocalDate ld = LocalDate.parse(date, dtf);
        int week = ld.getDayOfWeek().getValue();
        int dayInMonth = ld.getDayOfMonth();
        int dayInYear = ld.getDayOfYear();
        int month = ld.getMonthValue();
        int year = ld.getYear();
        LocalDate lastDayOfMonth = ld.with(TemporalAdjusters.lastDayOfMonth());
        String isLastDayInMonth = Period.between(ld, lastDayOfMonth).getDays() == 0 ? "Y" : "N";
        LocalDate firstDayOfMonth = ld.with(TemporalAdjusters.firstDayOfMonth());
        String isFirstDayInMonth = Period.between(ld, firstDayOfMonth).getDays() == 0 ? "Y" : "N";
        WeekFields weekFields = WeekFields.of(DayOfWeek.MONDAY, 1);
        int weekInMonth = ld.get(weekFields.weekOfMonth());
        int weekInYear = ld.get(weekFields.weekOfYear());
        DimDate dimDate = new DimDate();
        dimDate.setDateId(dateId);
        dimDate.setDate(date);

        dimDate.setWeek(week);
        dimDate.setWeekCn(weekCNMap.get(week));
        // 这里用常规周,未使用 周 iOS标准
        // 关于两者区别可查看:https://blog.csdn.net/Liunene/article/details/83024212
        String weekInYearStr = weekInYear < 10 ? "0" + weekInYear : weekInYear + "";
        dimDate.setYearWeek(year + "-" + weekInYearStr);

        dimDate.setMonth(month);
        dimDate.setYearMonth(DateUtils.getMonthDate(date));

        dimDate.setQuarter(getQuarterByMonth(month));
        dimDate.setYearQuarter(year + "-Q" + dimDate.getQuarter());
        dimDate.setYear(year);
        dimDate.setDayInYear(dayInYear);
        dimDate.setDayInMonth(dayInMonth);
        dimDate.setIsFirstDayInMonth(isFirstDayInMonth);
        dimDate.setIsLastDayInMonth(isLastDayInMonth);
        dimDate.setWeekInMonth(weekInMonth);
        dimDate.setWeekInYear(weekInYear);

        /**********************判断日期类型*******************************/
        List holidayList = holidaysMap.get("HOLIDAY");
        List exchangingHolidayList = holidaysMap.get("EXCHANGING_HOLIDAY");
        String dateType = "";
        if (holidayList.contains(date)) {
            dateType = DateTypeEnum.holiday.getCode();
        } else if (exchangingHolidayList.contains(date)) {
            dateType = DateTypeEnum.exchanging_holiday.getCode();
        } else if (week == 6 || week == 7) {
            dateType = DateTypeEnum.weekend.getCode();
        } else {
            dateType = DateTypeEnum.workday.getCode();
        }
        dimDate.setDateType(dateType);
        return dimDate;
    }

    public static int getQuarterByMonth(int month) {
        int quarter = -1;
        if (month == 1 || month == 2 || month == 3) {
            quarter = 1;
        } else if (month == 4 || month == 5 || month == 6) {
            quarter = 2;
        } else if (month == 7 || month == 8 || month == 9) {
            quarter = 3;
        } else if (month == 10 || month == 11 || month == 12) {
            quarter = 4;
        }
        return quarter;
    }

    public static void main(String[] args) throws Exception {
//        System.out.println(getAfterDays("2020-01-01", 35));

        getHolidayDates(2020);
//        System.out.println(DateUtils.getDistanceBetweenDates("2020-03-03", "2020-02-08"));

        List list = getDimDateListByYear(2021);
        System.out.println(JSONObject.toJSONString(list));

    }

    public static Map> getHolidayDates(int year) {
        HashMap> holidayDates = new HashMap<>();
        String url = API_URL + "?key=你自己的key&date=" + year + "&type=1";
        String resStr = HttpUtil.get(url);
        JSONObject scrResponse = JSON.parseObject(resStr);
        // 节假日
        ArrayList holidays = new ArrayList<>();
        // 节假日调休
        ArrayList exchangingHolidays = new ArrayList<>();
        if (200 == scrResponse.getInteger("code")) {
            JSONArray list = scrResponse.getJSONArray("newslist");
            for (Object o : list) {
                String vacation = ((JSONObject) o).getString("vacation");
                String remark = ((JSONObject) o).getString("remark");
                if (StringUtils.isNotBlank(vacation)) {
                    String[] split = vacation.split("\\|");
                    holidays.addAll(Arrays.asList(split));
                }
                if (StringUtils.isNotBlank(remark)) {
                    String[] split = remark.split("\\|");
                    exchangingHolidays.addAll(Arrays.asList(split));
                }
            }
        }

        holidayDates.put("HOLIDAY", holidays);
        holidayDates.put("EXCHANGING_HOLIDAY", exchangingHolidays);
        return holidayDates;
    }

}

3、使用springboot+mybatis插入到数据库中

这块儿的东西比较多,但是技术含量不高,所以不再展示;有需要的同学可以下载项目到本地,直接运行即可!

Java项目地址:https://download.csdn.net/download/NDF923/19060062

最终插入数据库中的数据如下:

Java8实现批量生成数仓日期维度数据_第1张图片

三、导入到hive中

1、hive表创建sql

create table dim_date(
  date_id String  COMMENT '日期数字格式,如:20200101',
  date_value String  COMMENT '日期字符串格式,如:2020-01-01',
  week_num String  COMMENT '周几的数字,如:1,2,3,4,5,6,7',
  week_cn String  COMMENT '周几的中文表示,如:星期一',
  year_week String  COMMENT '年周组合,如:2020-34',
  month_num String  COMMENT '月份数字,如:1,2...12',
  year_month_value String  COMMENT '年月组合,如:2020-01',
  quarter_num String  COMMENT '季度数字,如:1,2,3,4',
  year_quarter String  COMMENT '年季度组合,如:2020-Q1',
  year_num String  COMMENT '年数字,如:2020',
  day_in_year String  COMMENT '一年中的第几天,如:365',
  day_in_month String  COMMENT '一月中的第几天,如:28',
  is_first_day_in_month String  COMMENT '是否是月的第一天(Y:是;N:否)',
  is_last_day_in_month String  COMMENT '是否是月的最后一天(Y:是;N:否)',
  week_in_month String  COMMENT '月的第几周,如:1',
  week_in_year String  COMMENT '年的第几周,如:30',
  date_type String  COMMENT '日期类型:工作日:WORKDAY;国家法定节假日:HOLIDAY;周末休息日:WEEKEND;节假日调休:EXCHANGING_HOLIDAY'
)
row format delimited
fields terminated by '\001'
lines terminated by '\n'
stored as textfile;

注:字段分隔一定要用 "\001",这样可以和mysql数据库的分隔符对应上

2、sqoop导入

sqoop import \
--connect jdbc:mysql://192.168.100.154:3306/test \
--username root \
--password a123456 \
--table dim_date \
--fields-terminated-by "\001" \
--hive-import \
--create-hive-table  \
--hive-database dim \
--hive-table dim_date \
--delete-target-dir \
--hive-drop-import-delims \
-m 1

到此,任务完成!!!

Java8实现批量生成数仓日期维度数据_第2张图片

-------------------------------------------------------------------------------------------华丽的分割线---------------------------------------------------------------------------------------------------------------------------------------

最近根据实际需求,比如每天更新统计数据时,要更新当周、当月、当季度的数据,因此新增了如下字段:

Java8实现批量生成数仓日期维度数据_第3张图片

具体可下载项目,解压后导入idea即可,项目有也包含mysql和hive的所有SQL脚本;Java代码项目地址:https://download.csdn.net/download/NDF923/19060062

 

你可能感兴趣的:(大数据,数据仓库,hive,数据库,java)