#建表
create table sign_in (uri string , koudaiToken string) row format delimited fields terminated by ‘|’;
#如果不存在表则创建
create table if not exists mytable (id bigint,name string);
#创建外部表
hive> create external table lss_sign_in (uri string,token string) row format delimited fields terminated by ‘\t’ ;
#从HDFS加载数据到表
hive> load data inpath ‘hdfs:/user/root/input/sign_in/sign.uri’ overwrite into table lss_sign_in;
#本地文件加载到hive 仓库:
hive> LOAD DATA LOCAL INPATH ‘/luanshoushen/test.txt’ OVERWRITE INTO TABLE lss;
#从HDFD加载到HIVE 仓库(注意这里记载完成后会删除掉HDFS上的文件)
hive> LOAD DATA INPATH ‘hdfs:/user/root/input/test.txt’ OVERWRITE INTO TABLE kevin;
Loading data to table default.kevin
Deleted hdfs://idc01-vm-test-124:9019/user/hive/warehouse/kevin
虽然源文件表面上看是在HDFS删除了,其实文件被移到hive HDFS数据仓库中去了。
默认数据库仓库的文件保存在: /user/hive/warehouse/
其中kevin对应的HIVE中得元数据表名称。
test.txt就是源文件,从本地加载文件和HDFS加载文件类似。
./hadoop fs -text /user/hive/warehouse/kevin/test.txt
#查看表结构
describe tableName;
id int
name string
age int
也可以使用:desctableName;
#显示所有函数
show functions;
#查看函数使用方法
hive> describe function substr;
substr(str, pos[, len]) – returns the substring of str that starts at pos and is of length len
#根据URI分组,count(uri) 可以统计URI的PV
hive> select uri,count(uri) from sign_in group by uri;
#统计UV
hive> select uri, count(distinct koudaitoken) from sign_in group by uri;
#也同样支持 limit
#修改表结构
hive> alter table sign_in_uri replace columns(uri string);
#把Select结果插入到表中
insert overwrite table sign_in_uri select uri from sign_in group by uri;
#链接查询
#左链接
select sign_in.*,sign_in_uri.* from sign_in_uri left outer join sign_in on(sign_in_uri.uri = sign_in.uri);
#右链接
select sign_in.*,sign_in_uri.* from sign_in_uri right outer join sign_in on(sign_in_uri.uri = sign_in.uri) ;
#全链接
hive> select sign_in.*,sign_in_uri.* from sign_in_uri full outer join sign_in on(sign_in_uri.uri = sign_in.uri) limit 100;
#in 查询(hive不支持IN)使用semi join 达到相似效果
hive> select sign_in_uri.* from sign_in_uri left semi join sign_in on(sign_in_uri.uri = sign_in.uri) limit 10;
使用正则
hive> select regexp_extract(koudaitoken,’\\[.*\\]‘,0) from sign_in limit 10;
#查看函数使用方法
hive> describe function regexp_extract
regexp_extract(str, regexp[, idx]) – extracts a group that matches regexp
#注意写入HDFS或本地目录时会删除掉目录下的内容。
#将Hive数据导出到本地目录
hive> insert overwrite local directory ‘/luanshoushen/hive’ select * from sign_in_uri;
#将Hive数据导出到HDFS
hive> insert overwrite directory ‘user/root/input/hive’ select * from sign_in;
#使用一个查询将结果写入HDFS目录和本地目录
hive> from sign_in
> insert overwrite local directory ‘/luanshoushen/hive’ select *
> insert overwrite directory ‘/user/root/input/’ select *
> ;
http://mojijs.com/2014/04/134869/index.html