hive的基本使用01

数据类型:
普通:tinyint smalint int bigint boolean float double string timestamp binary
集合:struct map array

建表:

create table employees(
    name string,
    salary float,
    sub array<string>,
    deductions map<string,float>,
    address strucestring,city:string,state:string,zip:int>
)

hive默认分隔符:
\n
^A 分割字段列
^B array和struct和map之间的分割符
^C map的key和value的分隔符

数据定义:
创建:
show databases;
create database test_database;
create database test location ‘/my/test/store’ 修改默认位置
create database test comment ‘zhushi’ 添加描述信息
craete database test with dbproperties(‘creator’=’lijie’,’date’=2016-1-1)
详情:
describe database extended test; 显示详细信息
use test 使用那个库
drop database if exists test 删除database
drop database if exists test cascade 如果库里面有表直接删除 如果是restrict有表不能删除(默认)
修改:
alter database test set dbproperties (‘creator’=’lijie1’)

查看表的详细信息:
describe extended test.table_name (也会显示分区表信息)
describe formatted test.table_name 

外部表:关键字 external (外部表,hive不认为能完全拥有这个数据,所有删除该表并不会删除数据,只是删除描述表的元数据)
create external table if not exists table_name(
    .....
)

复制表:(如果有external 表示复制表为外部表,若没有则和被复制的表一样)(外部表和管理表)
create  table if not exists copy_table like table_name location by '/path/data'


分区(如果表中的数据和分区个数非常大,执行一个包含所有分区的查询可能会触发一个巨大的mapreduce,
        1.建议使用strict,这样没有对分区字段 where的话不让提交
        2.设置成nostrict,可以提交
        可以通过 show partitions table_name; 查看所有分区
                 show partitions table_name partition (count = 'US'); 查看部分分区    
    )
create external table if not exists log_message(
    hms int,
    severity string,
    server string,
    process_id int,
    message string
) 
partitioned by (year int,month int,day int)
row format delimited fields terminated by '\t';

管理表中用户通过载入数据创建分区:
load data local inpath '${env:HOME}/california-employees'
into table employees
partition (country = 'US',state='CA');

自定义存储格式
stored as textfile;

删除表:
drop table if exists table_name;

表重命名
alter table log_message rename to logmsg;

修改添加删除表分区
alter table log_message add if exists
partition (year = 2016,month=1,day=1) location '/logs/2016/1/1'
partition (year = 2016,month=1,day=2) location '/logs/2016/1/2'
partition (year = 2016,month=1,day=3) location '/logs/2016/1/3'

修改分区路径
alter table log_messages patition (year = 2016,month=1,day=3) set location 's3n://ourbucket/logs/2016/01/01'

删除分区
alter table log_messages drop if exists partition (year = 2016,month=1,day=3)

修改列
增加列
删除替换列
修改表属性
修改存储属性

数据操作
装载数据 inpath下不能包含任何文件夹
load data local inpath ‘${env:HOME}/california-employees’
overwrite into table employees
partition (country=’US’,state=’CA’)

select 插入
insetrt into table employees
partition(country='US',state='OR')
select * from old_table ot
where ot.cnty='US' and ot.st='OR'  扫描多次
----------------------
from old_table ot
insert overwrite table employees
    partition(country='US',state='OR')
    select * from where ot.cnty='US' and ot.st='OR' 
insert overwrite table employees
    partition(country='US',state='ORR')
    select * from where ot.cnty='US' and ot.st='ORR' 
insert overwrite table employees
    partition(country='US',state='ORRRR')
    select * from where ot.cnty='US' and ot.st='ORRRR'  扫描表一次

    动态分区插入  (根据位置)
    insert overwrite table employees
    partition (country,state)
    select ...,ot.cnty,ot.st
    from old_table ot;  

    (动态静态结合)
    insert overwrite table employees
    partition (country='US',state)
    select ...,ot.cnty,ot.st
    from old_table ot
    where ot.cnty='US'; 

导出数据:
insert overwrite local directory='/temp/employees'
select name,salary,address from employees where state = 'CA';

多个文件输出
from old_table od 
insert overwrite directory '/tmp/california-employees'
    select * where ot.country='US' and ot.st='ca'
insetrt overwrite directory 'tmp/ccalifornia-employees' 
    select * where ot.country='US' and ot.st='cca'
insetrt overwrite directory 'tmp/cccalifornia-employees'    
    select * where ot.country='US' and ot.st='ccca'

你可能感兴趣的:(hive)