hive> describe financial.employee.salary
salary float employee salary
create external table if not exists stocks (
exchange string,
symbol string,
ymd string,
price_open float,
price_high float,
price_low float,
price_close float,
volume int,
price_adj_close float
)
row format delimited fields terminated by ','
location '/data/stocks';
关键字external告诉hive这个表是外部的,而后面的location子句则用于告诉hive数据位于哪个路径下。
create external table if not exists financial.employee1
like financial.employee
location '/path/data'
create table if not exists employee (
name string,
age tinyint,
salary float,
subordinates array,
address struct
)
partitioned by (country string, province string);
分区改变了hive对数据存储的组织方式。如果我们是在financial数据库中创建的这个表,那么对于这个表只会有一个employee目录与之对应:
hive> set hive.mapred.mode=strict;
...
hive> set hive.mapred.mode=nostrict;
hive> show partitions employee;
...
country=CN/provice=SHANNXI
country=CN/provice=HEBEI
...
hive> show partitions employee partition(country='CN',province='HEBEI');
country=CN/provice=HEBEI
hive> describe extended employee;
...
partitionKeys:[FieldSchema(name:country, type:string,comment:null),FieldSchema(name:province, type:string,comment:null)],
...
load data local inpath '$(env:HOME)/hebei-employee' into table employee partition (country = 'CN', province = 'HEBEI');
create external table if not exists log_message(
hms int,
serverity string,
server string,
process_id int,
message string
) partitioned by (year int, month int, day int)
row format delimited fields terminated by '\t';
hive> show partitions log_message;
...
year=2014/month=11/day=16
...
hive> describe extended log_message partition (year=2014, month=11, day=17);
...
location:hdfs://master_server/data/log_message/2014/11/17
...
drop table if exists employee;
alter table log_message rename to logmsg
alter table log_message add if not exists
partition(year = 2014, month = 1, day = 1) location '/logs/2014/01/01'
partition(year = 2014, month = 1, day = 2) location '/logs/2014/01/02'
...;
还可以高效的移动位置来修改某个分区的路径:
alter table log_message partition(year = 2014, month = 1, day = 1) set location 'hdfs://master_server/data/log_message/logs/2014/01/01';
用户还可以删除某个分区:
alter table log_message drop if exists partition(year = 2014, month = 1, day = 1);
3)修改列信息
alter table log_message
change column hms hours_munites_seconds int
comment 'THe hours , minutes, and seconds part of the timestamp'
after serverity;
即使字段名或字段类型没有改变,用户也需要完全指定旧的字段名,并给出新的字段名及新的字段类型。关键字column和comment子句都是可选的。上面的例子将字段转移到了serverity字段之后。如果用户想将这个字段移动到第一个位置,那么只需要使用first关键字替代after other_column子句即可。
alter table log_message add columns (
app_name string comment 'Application name',
session_id long comment 'The current session id'
);
5)删除或替换列
alter table log_message replace columns (
hours_mins_secs int comment 'hour, minute, seconds from timestamp',
servertity string comment 'The rest of the message'
);
这个语句实际上重命名了之前的hms字段并且从之前的表定义的模式中移除了字段server和process_id。因为是alter语句,所以只有表的元数据信息改变了。
alter table log_message set tblproperties (
'notes' = 'The process id is no longer captured; this column is always NULL'
);
8)修改存储属性
alter table log_message partition(year = 2014, month = 1, day = 1)
set fileformat sequencefilel;
如果表是分区表,那么需要使用partition子句。
转载请注明出处:http://blog.csdn.net/iAm333