[hadoop@hadoop102 apache-hive-3.1.2-bin]$ bin/hive
create database dw_dwd;
use dw_dwd;
序号 | 字段名 | 字段类型 | 字段中文名 | 描述 |
---|---|---|---|---|
1 | id | STRING | SKU_ID | 主键 |
2 | price | DECIMAL | 商品价格 | 来自sku表 |
3 | sku_name | STRING | 商品名称 | 来自sku表 |
4 | sku_desc | STRING | 商品描述 | 来自sku表 |
5 | weight | DECIMAL | 重量 | 来自sku表 |
6 | is_sale | INT | 是否在售 | 来自sku表 |
7 | spu_id | STRING | SPU编号 | 来自sku表 |
8 | spu_name | STRING | SPU名称 | 来自spu表 |
9 | category3_id | STRING | 三级品类ID | 来自sku表 |
10 | category3_name | STRING | 三级品类名称 | 来自base_category3表 |
11 | category2_id | STRING | 二级品类id | 来自base_category3表 |
12 | category2_name | STRING | 二级品类名称 | 来自base_category2表 |
13 | category1_id | STRING | 一级品类ID | 来自base_category2表 |
14 | category1_name | STRING | 一级品类名称 | 来自base_category1表 |
15 | tm_id | STRING | 品牌ID | 来自sku表 |
16 | tm_name | STRING | 品牌名称 | 来自base_trademark表 |
17 | sku_attr_values | STRING | 平台属性 | JSON串 |
18 | sku_sale_attr_values | STRING | 销售属性 | JSON串 |
19 | create_time | STRING | 创建时间 | JSON串 |
use dw_dwd;
DROP TABLE IF EXISTS dim_sku_full;
CREATE EXTERNAL TABLE dim_sku_full (
`id` STRING COMMENT '商品id',
`price` DECIMAL(16,2) COMMENT '商品价格',
`sku_name` STRING COMMENT '商品名称',
`sku_desc` STRING COMMENT '商品描述',
`weight` DECIMAL(16,2) COMMENT '重量',
`is_sale` INT COMMENT '是否在售;1:是,0:否',
`spu_id` STRING COMMENT 'spu编号',
`spu_name` STRING COMMENT 'spu名称',
`category3_id` STRING COMMENT '三级分类id',
`category3_name` STRING COMMENT '三级分类名称',
`category2_id` STRING COMMENT '二级分类id',
`category2_name` STRING COMMENT '二级分类名称',
`category1_id` STRING COMMENT '一级分类id',
`category1_name` STRING COMMENT '一级分类名称',
`tm_id` STRING COMMENT '品牌id',
`tm_name` STRING COMMENT '品牌名称',
`sku_attr_values` ARRAY<STRUCT<attr_id:STRING,value_id:STRING,attr_name:STRING,value_name:STRING>> COMMENT '平台属性',
`sku_sale_attr_values` ARRAY<STRUCT<sale_attr_id:STRING,sale_attr_value_id:STRING,sale_attr_name:STRING,sale_attr_value_name:STRING>> COMMENT '销售属性',
`create_time` STRING COMMENT '创建时间'
) COMMENT '商品维度表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/dw_dwd.db/dim_sku_full/'
TBLPROPERTIES ("orc.compress"="snappy");
msck repair table dim_sku_full;
[hadoop@hadoop102 ~]$ cd /home/hadoop/bin/
[hadoop@hadoop102 bin]$ vim dim_sku_full.sh
内容:
#!/bin/bash
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
date_y_m_d=$1
else
date_y_m_d=`date -d "-1 day" +%F`
fi
etl_sql="
insert overwrite table dw_dwd.dim_sku_full partition(dt='${date_y_m_d}')
select
sku.id,
sku.price,
sku.sku_name,
sku.sku_desc,
sku.weight,
sku.is_sale,
sku.spu_id,
spu.spu_name,
sku.category3_id,
c3.name,
c3.category2_id,
c2.name,
c2.category1_id,
c1.name,
sku.tm_id,
tm.tm_name,
attr.attrs,
sale_attr.sale_attrs,
sku.create_time
from
(
select
id,
price,
sku_name,
sku_desc,
weight,
is_sale,
spu_id,
category3_id,
tm_id,
create_time
from dw_ods.ods_sku_info_full
where dt='${date_y_m_d}'
) sku
left join
(
select
id,
spu_name
from dw_ods.ods_spu_info_full
where dt='${date_y_m_d}'
) spu on sku.spu_id=spu.id
left join
(
select
id,
name,
category2_id
from dw_ods.ods_base_category3_full
where dt='${date_y_m_d}'
) c3 on sku.category3_id=c3.id
left join
(
select
id,
name,
category1_id
from dw_ods.ods_base_category2_full
where dt='${date_y_m_d}'
) c2 on c3.category2_id=c2.id
left join
(
select
id,
name
from dw_ods.ods_base_category1_full
where dt='${date_y_m_d}'
) c1 on c2.category1_id=c1.id
left join
(
select
id,
tm_name
from dw_ods.ods_base_trademark_full
where dt='${date_y_m_d}'
) tm on sku.tm_id=tm.id
left join
(
select
sku_id,
collect_set(named_struct('attr_id',attr_id,'value_id',value_id,'attr_name',attr_name,'value_name',value_name)) attrs
from dw_ods.ods_sku_attr_value_full
where dt='${date_y_m_d}'
group by sku_id
) attr on sku.id=attr.sku_id
left join
(
select
sku_id,
collect_set(named_struct('sale_attr_id',sale_attr_id,'sale_attr_value_id',sale_attr_value_id,'sale_attr_name',sale_attr_name,'sale_attr_value_name',sale_attr_value_name)) sale_attrs
from dw_ods.ods_sku_sale_attr_value_full
where dt='${date_y_m_d}'
group by sku_id
) sale_attr on sku.id=sale_attr.sku_id;
"
hive -e "$etl_sql"
授予脚本执行权限:
[hadoop@hadoop102 bin]$ chmod +x dim_sku_full.sh
执行脚本:
[hadoop@hadoop102 bin]$ dim_sku_full.sh
CREATE EXTERNAL TABLE IF NOT EXISTS `dw_dwd.dim_coupon_full` (
`id` STRING COMMENT '优惠券编号',
`coupon_name` STRING COMMENT '优惠券名称',
`coupon_type_code` STRING COMMENT '优惠券类型编码',
`coupon_type_name` STRING COMMENT '优惠券类型名称',
`condition_amount` DECIMAL(16, 2) COMMENT '满额数',
`condition_num` BIGINT COMMENT '满件数',
`activity_id` STRING COMMENT '活动编号',
`benefit_amount` DECIMAL(16, 2) COMMENT '减免金额',
`benefit_discount` DECIMAL(16, 2) COMMENT '折扣',
`benefit_rule` STRING COMMENT '优惠规则:满元*减*元,满*件打*折',
`create_time` STRING COMMENT '创建时间',
`range_type_code` STRING COMMENT '优惠范围类型编码',
`range_type_name` STRING COMMENT '优惠范围类型名称',
`limit_num` BIGINT COMMENT '最多领取次数',
`taken_count` BIGINT COMMENT '已领取次数',
`start_time` STRING COMMENT '可以领取的开始时间',
`end_time` STRING COMMENT '可以领取的结束时间',
`operate_time` STRING COMMENT '修改时间',
`expire_time` STRING COMMENT '过期时间'
) COMMENT '优惠券维度表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/dw_dwd.db/dim_coupon_full/'
TBLPROPERTIES ("orc.compress"="snappy");
[hadoop@hadoop102 hadoop]$ cd /home/hadoop/bin/
[hadoop@hadoop102 bin]$ vim dim_coupon_full.sh
内容:
#!/bin/bash
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
date_y_m_d=$1
else
date_y_m_d=`date -d "-1 day" +%F`
fi
etl_sql="
insert overwrite table dw_dwd.dim_coupon_full partition(dt='${date_y_m_d}')
select
id,
coupon_name,
coupon_type,
coupon_dic.dic_name,
condition_amount,
condition_num,
activity_id,
benefit_amount,
benefit_discount,
case coupon_type
when '3201' then concat('满',condition_amount,'元减',benefit_amount,'元')
when '3202' then concat('满',condition_num,'件打', benefit_discount,' 折')
when '3203' then concat('减',benefit_amount,'元')
end benefit_rule,
create_time,
range_type,
range_dic.dic_name,
limit_num,
taken_count,
start_time,
end_time,
operate_time,
expire_time
from
(
select
id,
coupon_name,
coupon_type,
condition_amount,
condition_num,
activity_id,
benefit_amount,
benefit_discount,
create_time,
range_type,
limit_num,
taken_count,
start_time,
end_time,
operate_time,
expire_time
from dw_ods.ods_coupon_info_full
where dt='${date_y_m_d}'
)ci
left join
(
select
dic_code,
dic_name
from dw_ods.ods_base_dic_full
where dt='${date_y_m_d}'
and parent_code='32'
)coupon_dic
on ci.coupon_type=coupon_dic.dic_code
left join
(
select
dic_code,
dic_name
from dw_ods.ods_base_dic_full
where dt='${date_y_m_d}'
and parent_code='33'
)range_dic
on ci.range_type=range_dic.dic_code;
"
hive -e "$etl_sql"
授予脚本执行权限:
[hadoop@hadoop102 bin]$ chmod +x dim_coupon_full.sh
执行脚本:
[hadoop@hadoop102 bin]$ dim_coupon_full.sh 2023-12-05
CREATE EXTERNAL TABLE IF NOT EXISTS `dw_dwd.dim_activity_full` (
`activity_rule_id` STRING COMMENT '活动规则ID',
`activity_id` STRING COMMENT '活动ID',
`activity_name` STRING COMMENT '活动名称',
`activity_type_code` STRING COMMENT '活动类型编码',
`activity_type_name` STRING COMMENT '活动类型名称',
`activity_desc` STRING COMMENT '活动描述',
`start_time` STRING COMMENT '开始时间',
`end_time` STRING COMMENT '结束时间',
`create_time` STRING COMMENT '创建时间',
`condition_amount` DECIMAL(16, 2) COMMENT '满减金额',
`condition_num` BIGINT COMMENT '满减件数',
`benefit_amount` DECIMAL(16, 2) COMMENT '优惠金额',
`benefit_discount` DECIMAL(16, 2) COMMENT '优惠折扣',
`benefit_rule` STRING COMMENT '优惠规则',
`benefit_level` STRING COMMENT '优惠级别'
) COMMENT '活动维度表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/dw_dwd.db/dim_activity_full/'
TBLPROPERTIES ("orc.compress"="snappy");
vim dim_activity_full.sh
内容:
#!/bin/bash
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
date_y_m_d=$1
else
date_y_m_d=`date -d "-1 day" +%F`
fi
etl_sql="
insert overwrite table dw_dwd.dim_activity_full partition(dt='${date_y_m_d}')
select
rule.id,
info.id,
activity_name,
rule.activity_type,
dic.dic_name,
activity_desc,
start_time,
end_time,
create_time,
condition_amount,
condition_num,
benefit_amount,
benefit_discount,
case rule.activity_type
when '3101' then concat('满',condition_amount,'元减',benefit_amount,'元')
when '3102' then concat('满',condition_num,'件打', benefit_discount,' 折')
when '3103' then concat('打', benefit_discount,'折')
end benefit_rule,
benefit_level
from
(
select
id,
activity_id,
activity_type,
condition_amount,
condition_num,
benefit_amount,
benefit_discount,
benefit_level
from dw_ods.ods_activity_rule_full
where dt='${date_y_m_d}'
)rule
left join
(
select
id,
activity_name,
activity_type,
activity_desc,
start_time,
end_time,
create_time
from dw_ods.ods_activity_info_full
where dt='${date_y_m_d}'
)info
on rule.activity_id=info.id
left join
(
select
dic_code,
dic_name
from dw_ods.ods_base_dic_full
where dt='${date_y_m_d}'
and parent_code='31'
)dic
on rule.activity_type=dic.dic_code;
"
hive -e "$etl_sql"
授予脚本执行权限:
[hadoop@hadoop102 bin]$ chmod +x dim_activity_full.sh
执行脚本:
[hadoop@hadoop102 bin]$ dim_activity_full.sh
CREATE EXTERNAL TABLE IF NOT EXISTS `dw_dwd.dim_province_full` (
`id` STRING COMMENT '省份ID',
`province_name` STRING COMMENT '省份名称',
`area_code` STRING COMMENT '地区编码',
`iso_code` STRING COMMENT '旧版国际标准地区编码,供可视化使用',
`iso_3166_2` STRING COMMENT '新版国际标准地区编码,供可视化使用',
`region_id` STRING COMMENT '地区ID',
`region_name` STRING COMMENT '地区名称'
) COMMENT '地区维度表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/dw_dwd.db/dim_province_full/'
TBLPROPERTIES ("orc.compress"="snappy");
[hadoop@hadoop102 bin]$ vim dim_province_full.sh
内容:
#!/bin/bash
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
date_y_m_d=$1
else
date_y_m_d=`date -d "-1 day" +%F`
fi
etl_sql="
insert overwrite table dw_dwd.dim_province_full partition(dt='${date_y_m_d}')
select
province.id,
province.name,
province.area_code,
province.iso_code,
province.iso_3166_2,
region_id,
region_name
from
(
select
id,
name,
region_id,
area_code,
iso_code,
iso_3166_2
from dw_ods.ods_base_province
)province
left join
(
select
id,
region_name
from dw_ods.ods_base_region
)region
on province.region_id=region.id;
"
hive -e "$etl_sql"
授予脚本执行权限:
[hadoop@hadoop102 bin]$ chmod +x dim_province_full.sh
执行脚本:
[hadoop@hadoop102 bin]$ dim_province_full.sh
1、上传日期数据文件到HDFS的临时目录 /tmp/tmp_dim_date_info/
2、执行下面SQL
-- 创建临时表
CREATE EXTERNAL TABLE IF NOT EXISTS `default.tmp_dim_date_info` (
`date_id` STRING COMMENT '日',
`week_id` STRING COMMENT '周ID',
`week_day` STRING COMMENT '周几',
`day` STRING COMMENT '每月的第几天',
`month` STRING COMMENT '第几月',
`quarter` STRING COMMENT '第几季度',
`year` STRING COMMENT '年',
`is_workday` STRING COMMENT '是否是工作日',
`holiday_id` STRING COMMENT '节假日'
) COMMENT '时间维度表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/tmp/tmp_dim_date_info/';
-- 创建日期表
CREATE EXTERNAL TABLE IF NOT EXISTS `dw_dwd.dim_date` (
`date_id` STRING COMMENT '日期ID',
`week_id` STRING COMMENT '周ID,一年中的第几周',
`week_day` STRING COMMENT '周几',
`day` STRING COMMENT '每月的第几天',
`month` STRING COMMENT '一年中的第几月',
`quarter` STRING COMMENT '一年中的第几季度',
`year` STRING COMMENT '年份',
`is_workday` STRING COMMENT '是否是工作日',
`holiday_id` STRING COMMENT '节假日'
) COMMENT '日期维度表'
STORED AS ORC
LOCATION '/warehouse/dw_dwd.db/dim_date/'
TBLPROPERTIES ("orc.compress"="snappy");
-- 导入到正式表
insert overwrite table dw_dwd.dim_date select * from default.tmp_dim_date_info;
动态分区、拉链表
CREATE EXTERNAL TABLE IF NOT EXISTS `dw_dwd.dim_user_full` (
`id` STRING COMMENT '用户ID',
`name` STRING COMMENT '用户姓名',
`phone_num` STRING COMMENT '手机号码',
`email` STRING COMMENT '邮箱',
`user_level` STRING COMMENT '用户等级',
`birthday` STRING COMMENT '生日',
`gender` STRING COMMENT '性别',
`create_time` STRING COMMENT '创建时间',
`operate_time` STRING COMMENT '操作时间'
) COMMENT '用户维度表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/dw_dwd.db/dim_user_full/'
TBLPROPERTIES ("orc.compress"="snappy");
vim dim_user_full.sh
内容:
#!/bin/bash
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
date_y_m_d=$1
else
date_y_m_d=`date -d "-1 day" +%F`
fi
etl_sql="
insert overwrite table dw_dwd.dim_user_full partition (dt = '${date_y_m_d}')
select id,
name,
phone_num,
email,
user_level,
birthday,
gender,
create_time,
operate_time
from dw_ods.ods_user_info_full
where dt = '${date_y_m_d}'
"
hive -e "$etl_sql"
授予脚本执行权限:
[hadoop@hadoop102 bin]$ chmod +x dim_user_full.sh
执行脚本:
[hadoop@hadoop102 bin]$ dim_user_full.sh