
一 使用SpagoBI和Hive进行互联网统计分区问题?

1 原来根据年月日进行分区再拼接SQL时会异常复杂,需要在Where条件后判断是否跨年,跨月,跨日等问题。

2 Hive的自定义函数不能再Where条件后返回字符串条件来进行数据筛选,但是可以返回boolean进行数据过滤,

   比如:自定义函数date_where(startTime, endTime),根据开始日期和结束日期返回格式为:

   year=2015 and month=08 and day > 1 and day < 10 拼接到Where date_where("2015-08-01", "2015-08-10")不支持。

二 根据上述问题,通过日期(dt)建立分区

Step1 创建数据库

1 创建tvlog_test数据库 create database tvlog_test;

Step2 创建数据表

1 创建tvlog_tcl数据表

   create table if not exists tvlog_test.tvlog_tcl(

       id string,

       userid string,

       channelid string,

       channelname string,

       region string,

       channelcode string,

       ip string,

       starttime string,

       endtime string,

       fromchannel string,

       tochannel string,

       mac string,

       deviceid string,

       dnum string


   partitioned by (dt string)

   stored as orc;

2 创建epg_wiki_info数据表

  create table if not exists tvlog_test.epg_wiki_info(

      id string,

      name string,

      starttime string,

      endtime string,

      wikiscreenshots array<string>,

      wikicover map<string, string>,

      wikititle string,

      tags array<string>,

      wikiid string,

      channelcode string,

      channelname string,

      timestamp string


  partitioned by (dt string)

  stored as orc;

Step3 开启Hive动态分区插入

  set hive.exec.dynamic.partition=true;

  set hive.exec.dynamic.partition.mode=nonstrict;

  set hive.exec.max.dynamic.partitions.pernode=1000;

Step4 插入数据(2015-09-01 ~ 2015-09-05)

1 向tvlog_test.tvlog_tcl表插入数据

  insert overwrite table tvlog_test.tvlog_tcl

  partition (dt)

  select id, userid, channelid, channelname, region,

  channelcode, ip, starttime, endtime, fromchannel,

  tochannel, mac, deviceid, dnum, dt

  from tvlog.tvlog_tcl

  where year = 2015 and month = 9 and (day between 1 and 5);

2 向tvlog_test.epg_wiki_info表插入数据

  insert overwrite table tvlog_test.epg_wiki_info

  partition (dt)

  select id, name, starttime, endtime, wikiscreenshots, wikicover,

  wikititle, tags, wikiid, channelcode, channelname, timestamp, dt

  from tvlog.epg_wiki_info

  where dt between '2015-09-01' and '2015-09-05';
