start-all.sh
hive
create database empdb;
create database empdb1;
show databases;
use empdb;
drop database empdb1 cascade;
desc database empdb;
create external table emp_hr1(
name string,
employee_id string,
sin_number string,
start_date date,
sex string,
age int
)
row format delimited fields terminated by '|';
select * from emp_hr1;
create table emp_hr2 like emp_hr1;
desc emp_hr1;
desc emp_hr2;
alter table emp_hr2 rename to emp_hr3;
alter table emp_hr3 change employee_id employee_ids string;
alter table emp_hr3 add columns(salary int) ;
alter table emp_hr3 replace columns(
name string,
employee_ids string,
sin_number string,
start_date date,
sex string,
age int
);
create table emp_hr2(
name string,
start_date date,
sex string,
age int
);
insert into emp_hr2 select name,start_date,sex,age from emp_hr1;
简单方法:
create table emp_hr2 as select name,start_date,sex,age from emp_hr1;
--创建新表emp_hr4
create table emp_hr4(
name string,
start_date date,
sex string,
age int
)
row format delimited fields terminated by '|';
--插入数据
insert into emp_hr4 select name,start_date,sex,age from emp_hr2;
select count(*) cnt from emp_hr2;
select max(age) max_age from emp_hr2;
select min(age) min_age from emp_hr2;
select avg(age) avg_age from emp_hr2;
select name,age from emp_hr2 limit 5;
select name,age from emp_hr2 where age>50;
select name,age from emp_hr2 where age between 20 and 30;
select name,age from emp_hr2 where age=55;
select name,age from emp_hr2 where age not between 20 and 55;
select name,sex from emp_hr2 where age like '5%';
select name,sex from emp_hr2 where age like '_2';
select * from emp_hr2 where age>40 and sex='Male';
select * from emp_hr2 where sex!='Male';
--或者
select * from emp_hr2 where sex='Female';
select avg(age), sex from emp_hr2 group by sex;
select max(age) max_age,min(age) min_age, sex from emp_hr2 group by sex;
select sex from emp_hr2 group by sex having avg(age)>34;
select name,(55-age) new_age from emp_hr2 where age=<55 order by new_age desc;
注:返回数据过多,仅截取了部分数据。
select name, sex, age from emp_hr2 distribute by sex sort by age;
注:返回数据过多,仅截取了部分数据。
create table emp_hr1_part1(
name string,
employee_id string,
sin_number string,
start_date date,
sex string,
age int
)
partitioned by (sex_m string)
row format delimited fields terminated by '|';
--打开动态分区
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
insert into emp_hr1_part1 partition(sex_m='Male')
select * from emp_hr1 where sex='Male';
alter table emp_hr1_part1 add partition(sex_m='Female');
insert into emp_hr1_part1 partition(sex_m='Female')
select * from emp_hr1 where sex='Female';
show partitions emp_hr1_part1;
select * from emp_hr1_part1 where sex_m='Male';
--增加分区
ALTER TABLE emp_hr1_part1 ADD IF NOT EXISTS PARTITION (sex_m='Middle');
--删除分区
ALTER TABLE emp_hr1_part1 DROP IF EXISTS PARTITION (sex_m='Middle');
--增加分区
ALTER TABLE emp_hr1_part1 ADD IF NOT EXISTS PARTITION (sex_m='Middle');
--删除分区
ALTER TABLE emp_hr1_part1 DROP IF EXISTS PARTITION (sex_m='Middle');