-- 复习
CREATE TABLE IF NOT EXISTS dept_partition
(
deptno int,
dname string,
loc string
)
partitioned BY(month string)
row FORMAT delimited
fields terminated BY '\t'
;
DESC dept_partition;
show partitions dept_partition;
ALTER TABLE dept_partition ADD partition(month='201904');
ALTER TABLE dept_partition ADD partition(month='201905') partition(month='201906');
ALTER TABLE dept_partition drop partition(month='201904');
ALTER TABLE dept_partition drop partition(month='201905'),partition(month='201906');
LOAD data local inpath '/root/data/dept.txt' overwrite INTO TABLE `dept_partition`
partition(`month`='201904',`month`='201905');
-- 静态分区之二级分区
CREATE TABLE IF NOT EXISTS dept_partition2
(
deptno int,
dname string,
loc string
)
partitioned BY(month int,date int)
row FORMAT delimited
fields terminated BY '\t'
;
ALTER TABLE dept_partition2 ADD partition(month=201904,date=01);
ALTER TABLE dept_partition2 ADD partition(month=201904,date=02) partition(month=201904,date=03);
show partitions dept_partition2;
ALTER TABLE dept_partition2 drop partition(month=201904,date=01);
ALTER TABLE dept_partition2 drop partition(month=201904,date=02),partition(month=201904,date=03);
LOAD data local inpath '/root/data/dept.txt' overwrite INTO TABLE `dept_partition2` partition(month=201904,date=01);
SELECT * FROM dept_partition2;
-- 动态分区
-- 准备数据student_male_female.txt
001,tom,22,male
002,jack,23,male
003,rose,20,female
004,mike,25,male
005,lucy,21,female
006,andy,20,female
-- 建表
DROP TABLE IF EXISTS student_info;
CREATE TEMPORARY TABLE IF NOT EXISTS student_info
(
id string,
name string,
age int,
sex string
)
row FORMAT delimited
fields terminated BY ','
;
-- 加载数据
LOAD data local inpath '/root/data/student_male_female.txt' overwrite INTO TABLE `student_info`;
-- 查看数据
SELECT * FROM student_info;
-- 建分区表
DROP TABLE IF EXISTS student_partition;
CREATE TABLE IF NOT EXISTS student_partition
(
id string,
name string,
age int
)
partitioned BY (sex string);
-- 检查是否已开启动态分区模式【默认为true】
set hive.exec.dynamic.partition=true;
-- 关闭严格模式
set hive.exec.dynamic.partition.mode=nonstrict;
-- 插入数据【注意:所有字段顺序对应,分区字段必须放在最后】
INSERT INTO TABLE `student_partition` partition(sex)
SELECT id,name,age,sex FROM student_info;
-- 查看数据
SELECT * FROM `student_partition`;
show partitions `student_partition`;
-- 示例2
-- 准备数据student_partition2.txt
01,tom,22,2019-09-08
02,jack,23,2020-08-09
03,rose,20,2019-09-08
04,smith,21,2020-07-09
05,john,22,2019-09-08
-- 建临时表
CREATE TEMPORARY TABLE IF NOT EXISTS student_info2
(
id string,
name string,
age int,
start_date date
)
row FORMAT delimited
fields terminated BY ','
;
-- 加载数据
LOAD data local inpath '/root/data/student_partition2.txt' overwrite INTO TABLE `student_info2`;
-- 查看数据
SELECT * FROM student_info2;
-- 建分区表
CREATE TABLE IF NOT EXISTS student_partition2
(
id string,
name string,
age int,
start_date date
)
partitioned BY(year string,month string);
-- 查看数据
DESC student_partition2;
-- 插入数据
INSERT INTO TABLE student_partition2 partition(year,month)
SELECT id,name,age,start_date,YEAR(start_date) year,MONTH(start_date) month
FROM student_info2;
-- 分桶表
-- 准备数据bucket.txt
1,aa
2,bb
3,cc
4,dd
5,ee
6,ff
7,gg
8,hh
9,ii
10,jj
11,kk
12,ll
13,mm
14,nn
15,oo
16,pp
-- 建表
CREATE TABLE IF NOT EXISTS bucket_table
(
id int,
name string
)
row FORMAT delimited
fields terminated BY ','
;
-- 加载数据
LOAD data local inpath '/root/data/bucket.txt' overwrite INTO TABLE `bucket_table`;
-- 查看数据
SELECT * FROM bucket_table;
-- 设置分桶功能开关【默认值false】
SET hive.enforce.bucketing=true;
-- 创建分桶表
CREATE TABLE IF NOT EXISTS bucket_table2
(
id int,
name string
)
CLUSTERED BY (id)
INTO 4 buckets
row FORMAT delimited
fields terminated BY ',';
-- 插入数据
INSERT INTO TABLE bucket_table2
SELECT id,name FROM bucket_table;
-- 查看数据
SELECT id,name FROM bucket_table2;
-- 侧视图
-- 建表
CREATE TABLE employee AS
select * from managed_employee;
-- 查看数据
select * from employee;
SELECT work_place FROM employee;
+-------------------------+--+
| work_place |
+-------------------------+--+
| ["Montreal","Toronto"] |
| ["Montreal"] |
| ["New York"] |
| ["Vancouver"] |
+-------------------------+--+
SELECT explode(work_place) FROM employee;
+------------+--+
| col |
+------------+--+
| Montreal |
| Toronto |
| Montreal |
| New York |
| Vancouver |
+------------+--+
SELECT name,work_place FROM employee;
+----------+-------------------------+--+
| name | work_place |
+----------+-------------------------+--+
| Michael | ["Montreal","Toronto"] |
| Will | ["Montreal"] |
| Shelley | ["New York"] |
| Lucy | ["Vancouver"] |
+----------+-------------------------+--+
SELECT name,explode(work_place) FROM employee;
-- 报错
FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested
in expressionsError: Error while compiling statement: FAILED: SemanticException [Error 10081]: UDTF's are not support
ed outside the SELECT clause, nor nested in expressions (state=42000,code=10081)
-- 修正
SELECT name,wp
FROM employee lateral VIEW explode(work_place) t1 AS wp;
+----------+------------+--+
| name | wp |
+----------+------------+--+
| Michael | Montreal |
| Michael | Toronto |
| Will | Montreal |
| Shelley | New York |
| Lucy | Vancouver |
+----------+------------+--+
SELECT skills_score FROM employee;
+-----------------------+--+
| skills_score |
+-----------------------+--+
| {"DB":80} |
| {"Perl":85} |
| {"Python":80} |
| {"Sales":89,"HR":94} |
+-----------------------+--+
SELECT explode(skills_score) FROM employee;
+---------+--------+--+
| key | value |
+---------+--------+--+
| DB | 80 |
| Perl | 85 |
| Python | 80 |
| Sales | 89 |
| HR | 94 |
+---------+--------+--+
SELECT name,skills_score FROM employee;
+----------+-----------------------+--+
| name | skills_score |
+----------+-----------------------+--+
| Michael | {"DB":80} |
| Will | {"Perl":85} |
| Shelley | {"Python":80} |
| Lucy | {"Sales":89,"HR":94} |
+----------+-----------------------+--+
SELECT name,explode(skills_score) FROM employee;
-- 报错
FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested
in expressionsError: Error while compiling statement: FAILED: SemanticException [Error 10081]: UDTF's are not support
ed outside the SELECT clause, nor nested in expressions (state=42000,code=10081)
-- 修正
SELECT name,skill,score
FROM employee lateral VIEW explode(skills_score) t1 AS skill,score;
+----------+---------+--------+--+
| name | skill | score |
+----------+---------+--------+--+
| Michael | DB | 80 |
| Will | Perl | 85 |
| Shelley | Python | 80 |
| Lucy | Sales | 89 |
| Lucy | HR | 94 |
+----------+---------+--------+--+
SELECT sex_age FROM employee;
+----------------------------+--+
| sex_age |
+----------------------------+--+
| {"sex":"Male","age":30} |
| {"sex":"Male","age":35} |
| {"sex":"Female","age":27} |
| {"sex":"Female","age":57} |
+----------------------------+--+
SELECT explode(sex_age) FROM employee;
-- 报错
FAILED: UDFArgumentException explode() takes an array or a map as a parameter
Error: Error while compiling statement: FAILED: UDFArgumentException explode() takes an array or a map
as a parameter (state=42000,code=40000)
-- 修正
SELECT array(sex_age) FROM employee;
+------------------------------+--+
| _c0 |
+------------------------------+--+
| [{"sex":"Male","age":30}] |
| [{"sex":"Male","age":35}] |
| [{"sex":"Female","age":27}] |
| [{"sex":"Female","age":57}] |
+------------------------------+--+
SELECT inline(array(sex_age)) FROM employee;
+---------+------+--+
| sex | age |
+---------+------+--+
| Male | 30 |
| Male | 35 |
| Female | 27 |
| Female | 57 |
+---------+------+--+
SELECT name,inline(array(sex_age)) FROM employee;
-- 报错
FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested
in expressionsError: Error while compiling statement: FAILED: SemanticException [Error 10081]: UDTF's are not support
ed outside the SELECT clause, nor nested in expressions (state=42000,code=10081)
-- 修正
SELECT name,sex,age
FROM employee lateral VIEW inline(array(sex_age)) t1 AS sex,age;
+----------+---------+------+--+
| name | sex | age |
+----------+---------+------+--+
| Michael | Male | 30 |
| Will | Male | 35 |
| Shelley | Female | 27 |
| Lucy | Female | 57 |
+----------+---------+------+--+
SELECT * FROM employee;
+----------------+-------------------------+----------------------------+------------------------+-----
---------------------------------------------+--+| employee.name | employee.work_place | employee.sex_age | employee.skills_score |
employee.dept_title |+----------------+-------------------------+----------------------------+------------------------+-----
---------------------------------------------+--+| Michael | ["Montreal","Toronto"] | {"sex":"Male","age":30} | {"DB":80} | {"Pr
oduct":"Developer","Administration":"Lead"} || Will | ["Montreal"] | {"sex":"Male","age":35} | {"Perl":85} | {"Pr
oduct":"Lead","Test":"Lead"} || Shelley | ["New York"] | {"sex":"Female","age":27} | {"Python":80} | {"Te
st":"Lead","COE":"Architect"} || Lucy | ["Vancouver"] | {"sex":"Female","age":57} | {"Sales":89,"HR":94} | {"Sa
les":"Lead"} |+----------------+-------------------------+----------------------------+------------------------+-----
---------------------------------------------+--+
SELECT name,wp,sex,age,skill,score,dept,title
FROM employee
lateral VIEW explode(work_place) t AS wp
lateral VIEW inline(array(sex_age)) t AS sex,age
lateral VIEW explode(skills_score) t AS skill,score
lateral VIEW explode(dept_title) t AS dept,title;
+----------+------------+---------+------+---------+--------+-----------------+------------+--+
| name | wp | sex | age | skill | score | dept | title |
+----------+------------+---------+------+---------+--------+-----------------+------------+--+
| Michael | Montreal | Male | 30 | DB | 80 | Product | Developer |
| Michael | Montreal | Male | 30 | DB | 80 | Administration | Lead |
| Michael | Toronto | Male | 30 | DB | 80 | Product | Developer |
| Michael | Toronto | Male | 30 | DB | 80 | Administration | Lead |
| Will | Montreal | Male | 35 | Perl | 85 | Product | Lead |
| Will | Montreal | Male | 35 | Perl | 85 | Test | Lead |
| Shelley | New York | Female | 27 | Python | 80 | Test | Lead |
| Shelley | New York | Female | 27 | Python | 80 | COE | Architect |
| Lucy | Vancouver | Female | 57 | Sales | 89 | Sales | Lead |
| Lucy | Vancouver | Female | 57 | HR | 94 | Sales | Lead |
+----------+------------+---------+------+---------+--------+-----------------+------------+--+
SELECT split('aa,bb,cc',',');
+-------------------+--+
| _c0 |
+-------------------+--+
| ["aa","bb","cc"] |
+-------------------+--+
SELECT explode(split('aa,bb,cc',','));
+------+--+
| col |
+------+--+
| aa |
| bb |
| cc |
+------+--+
SELECT explode(split(null,','));
+------+--+
| col |
+------+--+
+------+--+
SELECT name,wp
FROM employee
lateral VIEW explode(split(null,',')) t AS wp;
+-------+-----+--+
| name | wp |
+-------+-----+--+
+-------+-----+--+
SELECT name,wp
FROM employee
lateral VIEW outer explode(split(null,',')) t AS wp;
+----------+-------+--+
| name | wp |
+----------+-------+--+
| Michael | NULL |
| Will | NULL |
| Shelley | NULL |
| Lucy | NULL |
+----------+-------+--+
-- 重点:
建库
建表(内,外)
加载数据
(1)load data ...
(2)INSERT INTO TABLE ...
分区(静,动)
侧视图(数组,映射,结构)