外部表分区添加数据方式代码示例

tips
1.
create external table ext_user(id int, mail String, count int) partitioned by (ds string) row format delimited fields terminated by '\t' stored as textfile location '/dsap/rawdata/user';  hdfs location 路径需为文件夹,或不存在(执行后会自动创建文件夹),另 date为保留关键字

2.
hadoop fs -put /work/user.txt /dsap/rawdata/user/20120412/user.txt
hadoop fs -put /work/user.txt /dsap/rawdata/user/20120413/user.txt

hadoop fs -ls /dsap/rawdata/user
drwxr-xr-x   - root supergroup          0 2012-04-13 16:46 /dsap/rawdata/user/20120412
drwxr-xr-x   - root supergroup          0 2012-04-13 16:46 /dsap/rawdata/user/20120413

3.
load data inpath '/dsap/rawdata/user/20120412' overwrite into table ext_user partition (ds='20120412');
load data inpath '/dsap/rawdata/user/20120413' overwrite into table ext_user partition (ds='20120413');


hadoop fs -ls /dsap/rawdata/user
drwxr-xr-x   - root supergroup          0 2012-04-13 16:46 /dsap/rawdata/user/ds=20120412
drwxr-xr-x   - root supergroup          0 2012-04-13 16:46 /dsap/rawdata/user/ds=20120413

alter table ext_user add partition (ds='20120411') location '/dsap/rawdata/user/20120411';

hadoop fs -ls /dsap/rawdata/user
drwxr-xr-x   - root supergroup          0 2012-04-13 17:06 /dsap/rawdata/user/20120411
drwxr-xr-x   - root supergroup          0 2012-04-13 16:46 /dsap/rawdata/user/ds=20120412
drwxr-xr-x   - root supergroup          0 2012-04-13 16:46 /dsap/rawdata/user/ds=20120413

4.
select count(*) from ext_user where ds='20120413';

5.
show partitions ext_user;




only test:

hadoop fs -put /work/user.txt /dsap/rawdata/user/20120415/user.txt
hadoop fs -put /work/user.txt /dsap/rawdata/user/20120416/user.txt
fs -ls
drwxr-xr-x   - root supergroup          0 2012-04-13 17:59 /dsap/rawdata/user/20120415
drwxr-xr-x   - root supergroup          0 2012-04-13 17:59 /dsap/rawdata/user/20120416

then
load data inpath '/dsap/rawdata/user/20120415' overwrite into table ext_user1 partition (ds='20120415');
fs -ls
drwxr-xr-x   - root supergroup          0 2012-04-13 18:02 /dsap/rawdata/user/20120416
drwxr-xr-x   - root supergroup          0 2012-04-13 17:59 /dsap/rawdata/user/ds=20120415   (20120415->ds=20120415)

do a tricky test:
load data inpath '/dsap/rawdata/user/ds=20120415' overwrite into table ext_user1 partition (ds='ds=20120415');
fs -ls
drwxr-xr-x   - root supergroup          0 2012-04-13 18:02 /dsap/rawdata/user/20120416
drwxr-xr-x   - root supergroup          0 2012-04-13 17:59 /dsap/rawdata/user/ds=ds%3D20120415 (ds=20120415->ds=ds%3D20120415)

hadoop fs -mv /dsap/rawdata/user/ds=ds%3D20120415 /dsap/rawdata/user/20120415
load data inpath '/dsap/rawdata/user/20120415' overwrite into table ext_user1 partition (ds='test');
drwxr-xr-x   - root supergroup          0 2012-04-13 17:59 /dsap/rawdata/user/ds=test
conclusion:  load data inpath 分区时候会使hdfs路径修改, mv 原路径 partition(分区=值)的形式

alter table ext_user1 add partition (ds=20120416) location '/dsap/rawdata/user/20120416';
drwxr-xr-x   - root supergroup          0 2012-04-13 18:02 /dsap/rawdata/user/20120416 路径不变
tricky one
hadoop fs -put /work/user.txt /dsap/rawdata/user/20120417/user.txt
alter table ext_user1 add partition (ds='ok') location '/dsap/rawdata/user/20120417';
drwxr-xr-x   - root supergroup          0 2012-04-13 18:19 /dsap/rawdata/user/20120417  路径不变

conclusion alter table add partition(分区=值) location ''; 不改变hdfs路径

你可能感兴趣的:(分布式)