-- 建库
CREATE DATABASE IF NOT EXISTS sales_source DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
-- 用库
USE sales_source;
-- 删表
DROP TABLE IF EXISTS customer;
DROP TABLE IF EXISTS product;
DROP TABLE IF EXISTS sales_order;
-- 建表
-- customer表
CREATE TABLE customer
(
customer_number INT(11) NOT NULL AUTO_INCREMENT,
customer_name VARCHAR(128) NOT NULL,
customer_street_address VARCHAR(256) NOT NULL,
customer_zip_code INT(11) NOT NULL,
customer_city VARCHAR(32) NOT NULL,
customer_state VARCHAR(32) NOT NULL,
PRIMARY KEY (customer_number)
);
-- product表
CREATE TABLE product
(
product_code INT(11) NOT NULL AUTO_INCREMENT,
product_name VARCHAR(128) NOT NULL,
product_category VARCHAR(256) NOT NULL,
PRIMARY KEY (product_code)
);
-- sales_order表
CREATE TABLE sales_order
(
order_number INT(11) NOT NULL AUTO_INCREMENT,
customer_number INT(11) NOT NULL,
product_code INT(11) NOT NULL,
order_date DATETIME NOT NULL,
entry_date DATETIME NOT NULL,
order_amount DECIMAL(18,2) NOT NULL,
PRIMARY KEY (order_number)
);
-- 插入数据
-- customer表插入数据
INSERT INTO customer
( customer_name
, customer_street_address
, customer_zip_code
, customer_city
, customer_state
)
VALUES
('Big Customers', '7500 Louise Dr.', '17050',
'Mechanicsburg', 'PA')
, ( 'Small Stores', '2500 Woodland St.', '17055',
'Pittsburgh', 'PA')
, ('Medium Retailers', '1111 Ritter Rd.', '17055',
'Pittsburgh', 'PA'
)
, ('Good Companies', '9500 Scott St.', '17050',
'Mechanicsburg', 'PA')
, ('Wonderful Shops', '3333 Rossmoyne Rd.', '17050',
'Mechanicsburg', 'PA')
, ('Loyal Clients', '7070 Ritter Rd.', '17055',
'Pittsburgh', 'PA')
;
-- product表插入数据
INSERT INTO product(product_name,product_category) VALUES
('Hard Disk','Storage'),
('Floppy Drive','Storage'),
('lcd panel','monitor')
;
-- 使用存储过程生成一个临时表,然后向sales_order表插入数据
-- 如果存在则删除存储过程
DROP PROCEDURE IF EXISTS usp_generate_order_data;
-- 创建存储过程
DELIMITER //
CREATE PROCEDURE usp_generate_order_data()
BEGIN
DROP TABLE IF EXISTS tmp_sales_order;
CREATE TABLE tmp_sales_order AS SELECT * FROM sales_order WHERE 1=0;
SET @start_date := UNIX_TIMESTAMP('2018-1-1');
SET @end_date := UNIX_TIMESTAMP('2018-11-23');
SET @i := 1;
WHILE @i<=100000 DO
SET @customer_number := FLOOR(1+RAND()*6);
SET @product_code := FLOOR(1+RAND()* 3);
SET @order_date := FROM_UNIXTIME(@start_date+RAND()*(@end_date-@start_date));
SET @amount := FLOOR(1000+RAND()*9000);
INSERT INTO tmp_sales_order VALUES (@i,@customer_number,@product_code,@order_date,@order_date,@amount);
SET @i := @i +1;
END WHILE;
TRUNCATE TABLE sales_order;
INSERT INTO sales_order
SELECT NULL,customer_number,product_code,order_date,entry_date,order_amount
FROM tmp_sales_order;
COMMIT;
DROP TABLE tmp_sales_order;
END //
-- 调用存储过程插入数据
CALL usp_generate_order_data();
我们的案例业务很明确就是:销售订单
-- 创建rds层数据库
create database sales_rds;
-- 使用库
USE sales_rds;
-- 删除表
DROP TABLE IF EXISTS rds.customer;
DROP TABLE IF EXISTS rds.product;
DROP TABLE IF EXISTS rds.sales_order;
drop table if exists cdc_time;
-- 创建sales_rds.customer表
CREATE TABLE sales_rds.customer
(
customer_number INT ,
customer_name VARCHAR(128) ,
customer_street_address VARCHAR(256) ,
customer_zip_code INT ,
customer_city VARCHAR(32) ,
customer_state VARCHAR(32)
);
-- 创建sales_rds.product表
CREATE TABLE sales_rds.product
(
product_code INT,
product_name VARCHAR(128) ,
product_category VARCHAR(256)
);
-- 创建sales_rds.sales_order表
CREATE TABLE sales_rds.sales_order
(
order_number INT ,
customer_number INT,
product_code INT ,
order_date timestamp ,
entry_date timestamp ,
order_amount DECIMAL(18,2)
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE
;
-- cdc表
create table cdc_time
(
start_time date,
end_time date
);
# 加载数据导入rds层
# ETL抽取
# 全量抽取
# 全量导入product表
sqoop import \
--connect jdbc:mysql://localhost:3306/sales_source \
--username root \
--password ok \
--table product \
--hive-import \
--hive-table sales_rds.product \
--hive-overwrite \
--target-dir temp
#全量导入customer表
sqoop import \
--connect jdbc:mysql://localhost:3306/sales_source \
--username root \
--password ok \
--table customer \
--hive-import \
--hive-table sales_rds.customer \
--hive-overwrite \
--target-dir temp
#增量抽取sales_order
#检查列
#模式append/lastmodefied
#last-value
sqoop job \
--create myjob \
-- import \
--connect jdbc:mysql://localhost:3306/sales_source \
--username root \
--password ok \
--table sales_order \
--hive-import \
--hive-table sales_rds.sales_order \
--check-column entry_date \
--incremental append \
--last-value '1900-1-1'
#查看sqoop job
sqoop job --list
#执行job
sqoop job --exec myjob
-- 创建dw层数据库
create database sales_dw;
-- 使用库
use sales_dw;
-- 创建dim_product表
create table dim_product
(
product_sk int ,
product_code int ,
product_name varchar(128),
product_category varchar(256),
version varchar(32),
effective_date date,
expiry_date date
)
clustered by (product_sk ) into 8 buckets
stored as orc tblproperties('transactional'='true');
-- dim_customer表
create table dim_customer
(
customer_sk int ,
customer_number int ,
customer_name varchar(128),
customer_street_address varchar(256),
customer_zip_code int,
customer_city varchar(32),
customer_state varchar(32),
version varchar(32),
effective_date date,
expiry_date date
)
clustered by (customer_sk ) into 8 buckets
stored as orc tblproperties('transactional'='true');
-- dim_date表
create table dim_date
(
date_sk int ,
date date,
month tinyint,
month_name varchar(16),
quarter tinyint,
year int
) row format delimited fields terminated by ','
stored as textfile;
-- dim_order表
create table dim_order
(
order_sk int ,
order_number int,
version varchar(32),
effective_date date,
expiry_date date
)
clustered by (order_sk ) into 8 buckets
stored as orc tblproperties('transactional'='true');
-- fact_sales_order表
create table fact_sales_order
(
order_sk int ,
customer_sk int ,
product_sk int ,
order_date_sk int ,
order_amount decimal(18,2)
)
partitioned by(order_date string)
clustered by (order_sk ) into 8 buckets
stored as orc tblproperties('transactional'='true');
vi generate_dim_date.sh
#!/bin/bash
#起始日期
date1=$1
#终止日期
date2=$2
#日期
tmpdate=`date -d "$date1" +%F`
#开始时间戳
startSec=`date -d "$date1" +%s`
#终止时间戳
endSec=`date -d "$date2" +%s`
#循环起始值
min=1
#循环终止值
max=`expr \( $endSec - $startSec \) \/ 60 \/ 60 \/ 24`
while [ $min -le $max ]
do
#计算月份
month=`date -d "$tmpdate" +%m`
#计算月英文名称
month_name=`date -d "$tmpdate" +%B`
#计算年
year=`date -d "$tmpdate" +%Y`
#计算季度
#quarter=`expr \( $month - 1 \) \/ 3 + 1`
quarter=`expr \( $month - 1 \) \/ 3 + 1`
echo "$min,$tmpdate,$month,$month_name,$quarter,$year" >> ./dim_date.csv
#计算下次日期
tmpdate=`date -d "$min day $date1" +%F`
#计算下次时间戳
startSec=`date -d "$min day $date1" +%s`
min=`expr $min + 1`
done
#赋权
chmod 777 generate_dim_date.sh
#执行脚本
./generate_dim_date.sh
#上传生成文件至hive的dim_date表目录下,即加载数据
hdfs dfs -put dim_date.csv /hive/warehouse/sales_dw.db/dim_date
-- 加载dim_product
from
(
select
row_number() over(order by sp.product_code) product_sk,
sp.product_code,
sp.product_name,
sp.product_category,
'1.0',
'2018-1-1',
'2050-1-1'
from sales_rds.product sp ) tmp
insert into sales_dw.dim_product select *;
-- 加载dim_customer
from
(
select
row_number() over(order by sp.customer_number) customer_sk,
sp.customer_number,
sp.customer_name,
sp.customer_street_address,
sp.customer_zip_code,
sp.customer_city,
sp.customer_state,
'1.0',
'2018-1-1',
'2050-1-1'
from sales_rds.customer sp ) tmp
insert into sales_dw.dim_customer select *;
-- 加载dim_order
from
(
select
row_number() over(order by sp.order_number) order_sk,
sp.order_number,
'1.0',
'2018-1-1',
'2050-1-1'
from sales_rds.sales_order sp ) tmp
insert into sales_dw.dim_order select *;
-- 加载fact_sales_order表
-- 设置动态分区
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=10000;
set hive.exec.max.dynamic.partitions.pernode=10000;
-- 加载数据
from
(
select
b.order_sk,
c.customer_sk,
d.product_sk,
e.date_sk order_date_sk,
a.order_amount,
substr(a.order_date,1,7) order_date
from sales_rds.sales_order a
join sales_dw.dim_order b on a.order_number=b.order_number
join sales_dw.dim_customer c on a.customer_number=c.customer_number
join sales_dw.dim_product d on a.product_code=d.product_code
join sales_dw.dim_date e on date(a.order_date)=e.date
) temp
insert into table sales_dw.fact_sales_order partition(order_date)
select order_sk,customer_sk,product_sk,order_date_sk,order_amount,order_date;
-- dm层 宽表 想求2018 10月20号这一天的指标
-- 顾客,产品,日期,当天订单个数,当天的订单金额,近两天的订单个数,近两天的订单金额
create database if not exists sales_dm;
create table if not exists sales_dm.dm_order as
select
c.customer_sk ,
c.customer_number ,
c.customer_name ,
c.customer_street_address ,
c.customer_zip_code ,
c.customer_city ,
c.customer_state ,
p.product_sk ,
p.product_code ,
p.product_name ,
p.product_category,
dd.date_sk,
dd.date ,
dd.month ,
dd.month_name ,
dd.quarter ,
dd.year ,
sum(case when datediff("2018-10-20",dd.date)=0 then 1 else 0 end) one_order_cnt,
sum(case when datediff("2018-10-20",dd.date)<=1 then 1 else 0 end) two_order_cnt,
sum(case when datediff("2018-10-20",dd.date)<=0 then fso.order_amount else 0 end) one_order_cnt_amount,
sum(case when datediff("2018-10-20",dd.date)<=1 then 1 else 0 end) two_order_cnt_amount
from sales_dw.fact_sales_order fso
join sales_dw.dim_customer c on fso.customer_sk=c.customer_sk
join sales_dw.dim_product p on fso.product_sk=p.product_sk
join sales_dw.dim_date dd on fso.order_date_sk=dd.date_sk
where dd.date>='2018-10-19' and dd.date<='2018-10-20'
group by
c.customer_sk ,
c.customer_number ,
c.customer_name ,
c.customer_street_address ,
c.customer_zip_code ,
c.customer_city ,
c.customer_state ,
p.product_sk ,
p.product_code ,
p.product_name ,
p.product_category,
dd.date_sk,
dd.date ,
dd.month ,
dd.month_name ,
dd.quarter ,
dd.year;
➢ 初始化装载
USE sales_dw;
-- 清空表
TRUNCATE TABLE dim_customer;
TRUNCATE TABLE dim_product;
TRUNCATE TABLE dim_order;
TRUNCATE TABLE fact_sales_order;
-- 装载客户维度表
from
(
select
row_number() over(order by sp.customer_number) customer_sk,
sp.customer_number,
sp.customer_name,
sp.customer_street_address,
sp.customer_zip_code,
sp.customer_city,
sp.customer_state,
'1.0',
'2018-1-1',
'2050-1-1'
from sales_rds.customer sp ) tmp
insert into sales_dw.dim_customer select *;
-- 装载产品维度表
from
(
select
row_number() over(order by sp.product_code) product_sk,
sp.product_code,
sp.product_name,
sp.product_category,
'1.0',
'2018-1-1',
'2050-1-1'
from sales_rds.product sp ) tmp
insert into sales_dw.dim_product select *;
-- 装载订单维度表
from
(
select
row_number() over(order by sp.order_number) order_sk,
sp.order_number,
'1.0',
'2018-1-1',
'2050-1-1'
from sales_rds.sales_order sp ) tmp
insert into sales_dw.dim_order select *;
-- 装载销售订单事实表
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=10000;
set hive.exec.max.dynamic.partitions.pernode=10000;
from
(
select
b.order_sk,
c.customer_sk,
d.product_sk,
e.date_sk order_date_sk,
a.order_amount,
substr(a.order_date,1,7) order_date
from sales_rds.sales_order a
join sales_dw.dim_order b on a.order_number=b.order_number
join sales_dw.dim_customer c on a.customer_number=c.customer_number
join sales_dw.dim_product d on a.product_code=d.product_code
join sales_dw.dim_date e on date(a.order_date)=e.date
) temp
insert into table sales_dw.fact_sales_order partition(order_date)
select order_sk,customer_sk,product_sk,order_date_sk,order_amount,order_date;
#!/bin/bash
# 建立Sqoop增量导入作业,以order_number作为检查列,初始的last-value是0
sqoop job --delete rds_incremental_import_job
sqoop job --create rds_incremental_import_job \
-- \
import \
--connect jdbc:mysql://localhost:3306/sales_source \
--username root \
--password ok \
--table sales_order \
--hive-import \
--hive-table sales_rds.sales_order \
--fields-terminated-by '\t' \
--lines-terminated-by '\n' \
--incremental append \
--check-column order_number \
--last-value 0
# 首次抽取,将全部数据导入RDS库
sqoop import --connect jdbc:mysql://localhost:3306/sales_source \
--username root --password ok --table customer --hive-import --hive-table sales_rds.customer --hive-overwrite --target-dir temp
sleep 2
sqoop import --connect jdbc:mysql://localhost:3306/sales_source --username root --password ok --table product --hive-import --hive-table sales_rds.product --hive-overwrite --target-dir temp
beeline -u jdbc:hive2://hadoop01:10000/sales_dw -e "TRUNCATE TABLE sales_rds.sales_order"
# 执行增量导入,因为last-value初始值为0,所以此次会导入全部数据
sqoop job --exec rds_incremental_import_job
# 调用init_etl.sql文件执行初始装载
spark-sql --master yarn-client -f init_dw_etl.sql
➢ 定期装载
<property>
<name>hive.optimize.sort.dynamic.partitionname>
<value>falsevalue>
property>
<property>
<name>hive.support.concurrencyname>
<value>truevalue>
property>
<property>
<name>hive.enforce.bucketingname>
<value>truevalue>
property>
<property>
<name>hive.exec.dynamic.partition.modename>
<value>nonstrictvalue>
property>
<property>
<name>hive.txn.managername>
<value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManagervalue>
property>
<property>
<name>hive.compactor.initiator.onname>
<value>truevalue>
property>
<property>
<name>hive.compactor.worker.threadsname>
<value>1value>
property>
-- 设置scd的生效时间和过期时间
use sales_dw;
SET hivevar:cur_date = CURRENT_DATE();
SET hivevar:pre_date = DATE_ADD(${hivevar:cur_date},-1);
SET hivevar:max_date = CAST('2050-01-01' AS DATE);
-- 设置cdc的开始结束日期
INSERT overwrite TABLE sales_rds.cdc_time
SELECT end_time, ${hivevar:cur_date} FROM sales_rds.cdc_time;
-- 装载customer维度
-- 获取源数据中被删除的客户和地址发生改变的客户,将这些数据设置为过期时间,即当前时间的前一天
UPDATE dim_customer SET expiry_date = ${hivevar:pre_date}
WHERE dim_customer.customer_sk IN(SELECT
a.customer_sk
FROM (SELECT
customer_sk,
customer_number,
customer_street_address
FROM dim_customer
WHERE expiry_date = ${hivevar:max_date}) a
LEFT JOIN sales_rds.customer b ON a.customer_number = b.customer_number
WHERE b.customer_number IS NULL
OR a.customer_street_address <> b.customer_street_address);
-- 装载product维度
-- 取源数据中删除或者属性发生变化的产品
UPDATE dim_product
SET expiry_date = ${hivevar:pre_date}
WHERE dim_product.product_sk IN(SELECT a.product_sk
FROM(SELECT product_sk,
product_code,
product_name,
product_category
FROM dim_product
WHERE expiry_date = ${hivevar:max_date}) a
LEFT JOIN sales_rds.product b ON a.product_code = b.product_code
WHERE b.product_code IS NULL
OR (a.product_name <> b.product_name OR a.product_category <> b.product_category));
-- 将有地址变化的插入到dim_customer表,如果有相同数据存在有不过期的数据则不插入
INSERT INTO dim_customer
SELECT row_number() over (ORDER BY t1.customer_number) + t2.sk_max,
t1.customer_number,
t1.customer_name,
t1.customer_street_address,
t1.customer_zip_code,
t1.customer_city,
t1.customer_state,
t1.version,
t1.effective_date,
t1.expiry_date
FROM(SELECT
t2.customer_number customer_number,
t2.customer_name customer_name,
t2.customer_street_address customer_street_address,
t2.customer_zip_code,
t2.customer_city,
t2.customer_state,
t1.version + 1 `version`,
${hivevar:pre_date} effective_date,
${hivevar:max_date} expiry_date
FROM dim_customer t1
INNER JOIN sales_rds.customer t2 ON t1.customer_number = t2.customer_number
AND t1.expiry_date = ${hivevar:pre_date}
LEFT JOIN dim_customer t3 ON t1.customer_number = t3.customer_number
AND t3.expiry_date = ${hivevar:max_date}
WHERE t1.customer_street_address <> t2.customer_street_address
AND t3.customer_sk IS NULL
) t1
CROSS JOIN(SELECT COALESCE(MAX(customer_sk),0) sk_max FROM dim_customer) t2;
-- 处理customer_name列上的scd1,覆盖
-- 不进行更新,将源数据中的name列有变化的数据提取出来,放入临时表
-- 将 dim_couster中这些数据删除、
-- 将临时表中的数据插入
DROP TABLE IF EXISTS tmp;
CREATE TABLE tmp AS
SELECT a.customer_sk,
a.customer_number,
b.customer_name,
a.customer_street_address,
a.customer_zip_code,
a.customer_city,
a.customer_state,
a.version,
a.effective_date,
a.expiry_date
FROM dim_customer a
JOIN sales_rds.customer b ON a.customer_number = b.customer_number
where a.customer_name != b.customer_name;
-- 删除数据
DELETE FROM
dim_customer WHERE
dim_customer.customer_sk IN (SELECT customer_sk FROM tmp);
-- 插入数据
INSERT INTO dim_customer
SELECT * FROM tmp;
-- 处理新增的customer记录
INSERT INTO dim_customer
SELECT row_number() over (ORDER BY t1.customer_number) + t2.sk_max,
t1.customer_number,
t1.customer_name,
t1.customer_street_address,
t1.customer_zip_code,
t1.customer_city,
t1.customer_state,
1,
${hivevar:pre_date},
${hivevar:max_date}
FROM( SELECT t1.*
FROM sales_rds.customer t1
LEFT JOIN dim_customer t2 ON t1.customer_number = t2.customer_number
WHERE t2.customer_sk IS NULL) t1
CROSS JOIN(SELECT
COALESCE(MAX(customer_sk),0) sk_max
FROM dim_customer) t2;
-- 处理product_name、product_category列上scd2的新增行
INSERT INTO dim_product
SELECT row_number() over (ORDER BY t1.product_code) + t2.sk_max,
t1.product_code,
t1.product_name,
t1.product_category,
t1.version,
t1.effective_date,
t1.expiry_date
FROM( SELECT t2.product_code product_code,
t2.product_name product_name,
t2.product_category product_category,
t1.version + 1 `version`,
${hivevar:pre_date} effective_date,
${hivevar:max_date} expiry_date
FROM dim_product t1
INNER JOIN sales_rds.product t2 ON t1.product_code = t2.product_code
AND t1.expiry_date = ${hivevar:pre_date}
LEFT JOIN dim_product t3 ON t1.product_code = t3.product_code
AND t3.expiry_date = ${hivevar:max_date}
WHERE(t1.product_name <> t2.product_name
OR t1.product_category <> t2.product_category)
AND t3.product_sk IS NULL
) t1
CROSS JOIN (SELECT COALESCE(MAX(product_sk),0) sk_max
FROM dim_product) t2;
-- 处理新增的 product 记录
INSERT INTO dim_product
SELECT row_number() over (ORDER BY t1.product_code) + t2.sk_max,
t1.product_code,
t1.product_name,
t1.product_category,
1,
${hivevar:pre_date},
${hivevar:max_date}
FROM( SELECT t1.*
FROM sales_rds.product t1
LEFT JOIN dim_product t2 ON t1.product_code = t2.product_code
WHERE t2.product_sk IS NULL
) t1
CROSS JOIN (SELECT COALESCE(MAX(product_sk),0) sk_max
FROM dim_product) t2;
-- 装载order维度
INSERT INTO dim_order
SELECT row_number() over (ORDER BY t1.order_number) + t2.sk_max,
t1.order_number,
t1.version,
t1.effective_date,
t1.expiry_date
FROM( SELECT order_number order_number,
1 `version`,
order_date effective_date,
'2050-01-01' expiry_date
FROM sales_rds.sales_order, sales_rds.cdc_time
WHERE entry_date >= end_time AND entry_date < start_time ) t1
CROSS JOIN( SELECT COALESCE(MAX(order_sk),0) sk_max
FROM dim_order) t2;
-- 装载销售订单事实表
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=10000;
set hive.exec.max.dynamic.partitions.pernode=10000;
from
(
select
b.order_sk,
c.customer_sk,
d.product_sk,
e.date_sk order_date_sk,
a.order_amount,
substr(a.order_date,1,7) order_date
from sales_rds.sales_order a
join sales_dw.dim_order b on a.order_number=b.order_number
join sales_dw.dim_customer c on a.customer_number=c.customer_number
join sales_dw.dim_product d on a.product_code=d.product_code
join sales_dw.dim_date e on date(a.order_date)=e.date,
sales_rds.cdc_time f
where a.order_date >= c.effective_date
AND a.order_date < c.expiry_date
AND a.order_date >= d.effective_date
AND a.order_date < d.expiry_date
AND a.entry_date >= f.end_time
AND a.entry_date < f.start_time
) temp
insert into table sales_dw.fact_sales_order partition(order_date)
select order_sk,customer_sk,product_sk,order_date_sk,order_amount,order_date;
-- 更新时间戳表的字段
INSERT overwrite TABLE sales_rds.cdc_time
SELECT start_time,start_time
FROM sales_rds.cdc_time;
#!/bin/bash
# 整体拉取customer、product表数据
sqoop import --connect jdbc:mysql://localhost:3306/sales_source --username root \
--password ok --table customer --hive-import --hive-table sales_rds.customer --hive-overwrite --target-dir temp
sleep 2
sqoop import --connect jdbc:mysql://localhost:3306/sales_source --username root --password \
ok --table product --hive-import --hive-table sales_rds.product --hive-overwrite --target-dir temp
# 执行增量导入
sqoop job --exec rds_incremental_import_job
# 调用 sql 文件执行定期装载
hive -f schedule_daily_etl.sql
# spark-sql不支持hive事务,不要用下面的语句
#spark-sql --master yarn-client -f schedule_daily_etl.sql
crontab -e 定时任务执行如下: