这里我们根据实际情况作出一些总结,并针对不同的情况进行简单测试。
下面是具体实现以及简单测试。
先新建两张测试表test_01和test_02,其中test_01为普通表,test_02为分区表。
CREATE TABLE IF NOT EXISTS `test_01`(
`id` STRING,`name` STRING,`age` INT,`score` FLOAT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE;
CREATE TABLE IF NOT EXISTS `test_02`(
`id` STRING,`name` STRING,`age` INT,`score` FLOAT)
PARTITIONED BY (`dataday` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE;
这里分情况分别进行测试
[root@nd2 wh]# cat a.txt
1,lucy,20,90
2,Marry,21,95
3,Tom,22,100
--使用overwrite覆盖操作
hive> load data local inpath '/usr/wh/a.txt' overwrite into table test_01;
hive> select * from test_01;
1 lucy 20 90.0
2 Marry 21 95.0
3 Tom 22 100.0
--这里我们不使用overwrite,数据会被直接追加
hive> load data local inpath '/usr/wh/a.txt' into table test_01;
hive> select * from test_01;
1 lucy 20 90.0
2 Marry 21 95.0
3 Tom 22 100.0
1 lucy 20 90.0
2 Marry 21 95.0
3 Tom 22 100.0
--再次使用overwrite 验证数据是否被覆盖
hive> load data local inpath '/usr/wh/a.txt' overwrite into table test_01;
hive> select * from test_01;
1 lucy 20 90.0
2 Marry 21 95.0
3 Tom 22 100.0
--导入分区表需要指定分区
hive> load data local inpath '/usr/wh/a.txt' overwrite into table test_02
partition (dataday='20190501');
hive> select * from test_02;
1 lucy 20 90.0 20190501
2 Marry 21 95.0 20190501
3 Tom 22 100.0 20190501
[root@nd2 wh]# hadoop fs -cat /wh/test/a.txt
1,lucy,20,90
2,Marry,21,95
3,Tom,22,100
hive> load data inpath 'hdfs://nameservice1/wh/test/a.txt' overwrite into table test_01;
hive> select * from test_01;
OK
1 lucy 20 90.0
2 Marry 21 95.0
3 Tom 22 100.0
--需要注意的是,从HDFS上导入数据到HIVE表,此处是文件的移动,所以这里我们需要重新放一份文件到HDFS上
--[root@nd2 wh]# hadoop fs -ls /wh/test/
--[root@nd2 wh]# hadoop fs -put a.txt /wh/test/
hive> load data inpath 'hdfs://nameservice1/wh/test/a.txt' overwrite into table test_02 partition (dataday='20190501');
hive> select * from test_02;
1 lucy 20 90.0 20190501
2 Marry 21 95.0 20190501
3 Tom 22 100.0 20190501
hive> load data local inpath '/usr/wh/a.txt' overwrite into table test_01 partition (dataday='20190501');
FAILED: ValidationFailureSemanticException table is not partitioned but partition spec exists: {dataday=20190501}
hive> load data local inpath '/usr/wh/a.txt' overwrite into table test_02;
FAILED: SemanticException [Error 10062]: Need to specify partition columns because the destination table is partitioned
[root@nd2 wh]# cat b.txt
1,lucy,20
2,Marry,21
3,Tom,22
hive> load data local inpath '/usr/wh/b.txt' overwrite into table test_01;
hive> select * from test_01;
1 lucy 20 NULL
2 Marry 21 NULL
3 Tom 22 NULL
[root@nd2 wh]# cat c.txt
1,lucy,20,90,aa
2,Marry,21,95,bb
3,Tom,22,100,cc
hive> load data local inpath '/usr/wh/c.txt' overwrite into table test_01;
hive> select * from test_01;
1 lucy 20 90.0
2 Marry 21 95.0
3 Tom 22 100.0
[root@nd2 wh]# cat d.txt
1,lucy,aa,bb
2,Marry,cc,dd
3,Tom,xx,yy
hive> load data local inpath '/usr/wh/d.txt' overwrite into table test_01;
hive> select * from test_01;
1 lucy NULL NULL
2 Marry NULL NULL
3 Tom NULL NULL