数据导入
向表中装载数据(Load)
语法
load data [local] inpath '/opt/module/datas/student.txt' [overwrite] into table table_name [partition (partcol1=val1,…)];
- load data:表示加载数据
- local:表示从本地加载数据到hive表(复制);否则从HDFS加载数据到hive表(移动)
- inpath:表示加载数据的路径
- overwrite into:表示覆盖表中已有数据,否则表示追加
- into table:表示加载到哪张表
- student:表示具体的表
- partition:表示上传到指定分区
案例
- 准备数据
新建student1.txt和student2.txt,输入以下内容
student1.txt
1 zhao 18
2 jun 19
student2.txt
3 feng 17
4 xiang 16
5 bin 15
将student2.txt 上传到hdfs
hadoop fs -put /opt/module/datas/student2.txt /
- 开始操作
-- 创建student表
create table if not exists student(
id int,
name string,
age int
)
partitioned by(year string)
row format delimited fields terminated by '\t';
-- 加载本地文件到hive
load data local inpath '/opt/module/datas/student.txt'into table student partition(year='2017-2018');
-- 加载hdfs上的文件
load data inpath '/student.txt' into table student partition(year='2017-2018');
-- 覆盖上传
load data local inpath '/opt/module/datas/student.txt' overwrite into table student partition(year='2017-2018');
通过查询语句向表中插入数据(Insert)
还是上面那张表
-- 基本插入数据
insert into table student partition(year='2017-2018') values(11, 'zzz',10);
-- 根据单张表的查询结果插入数据
insert into table student partition(year='2018-2019') select id,name,age from student where year='2017-2018';
-- 根据多张表的查询结果插入数据
insert into table student partition(year='2019-2020')
select id,name,age from student where year='2017-2018'
union
select id,name,age from student where year='2018-2019';
from student
insert into table student partition(year='2020-2021')
select id,name,age where year in ('2017-2018','2018-2019','2019-2020');
查询语句中创建表并加载数据(As Select)
根据查询结果创建表(查询的结果会添加到新创建的表中)
create table if not exists student1 as select id,name,age from student where year in ('2017-2018','2018-2019','2019-2020','2020-2021');
创建表的时候通过Location指定加载数据路径
create table if not exists student2(
id int, name string, age int
)
row format delimited fields terminated by '\t'
location '/user/hive/warehouse/student2';
dfs -put /opt/module/datas/student.txt /user/hive/warehouse/student2/;
select * from student2;
Import数据到指定hive表中
先使用export导出后,再将数据导入
数据导出
insert导出
-- 将查询结果导出到本地
insert overwrite local directory '/opt/module/datas/output/student1' select * from student;
-- 将查询结果格式化导出到本地
insert overwrite local directory '/opt/module/datas/output/student2' row format delimited fields terminated by '\t' select * from student;
-- 将查询结果格式化导出到HDFS
insert overwrite directory '/output/student1' row format delimited fields terminated by '\t' select * from student;
hadoop命令导出到本地
dfs -get /user/hive/warehouse/student1/000000_0 /opt/module/datas/output/student.txt
hive shell命令导出
基本语法:(hive -f/-e 执行语句或者脚本 > file)
hive -e 'select * from default.student' > student1.txt
export导出到hdfs上
export table student to '/output/student3/';
sqoop导出
这个后面会写文章详细讲述
清除表中数据(Truncate)
Truncate只能删除管理表,不能删除外部表中数据
truncate table student;