Hive 的启动方式:
1、hive 命令行模式,直接输入/hive/bin/hive的执行程序,或者输入 hive - -service cli
2、hive web界面的启动方式,hive - -service hwi
3、hive 远程服务 (端口号10000) 启动方式,nohup hive - -service hiveserver &
创建表cite,并指定列的分隔符为“,”:
create table cite(citing INT, cited INT) row format delimited fields terminated by ',';
创建表page_view,并指定分区字段为dt,同时把列的分隔符设置为“,”:
create table page_view (pid INT, url string) partitioned by (dt string) row format delimited fields terminated by ',';
创建表S_USER_ERROR,并指定分区字段为YEAR、MONTH和DAY,同时把列的分隔符设置为“,”,最后指定表的存储路径为/data/stg/s_user_error/:
CREATE TABLE S_USER_ERROR
(
bar INT,
foo string
)
PARTITIONED BY (YEAR STRING,MONTH STRING,DAY STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\,' STORED AS TEXTFILE LOCATION '/data/stg/s_user_error/';
注意:如果不是外表部,drop table的时候会将HDFS上文件删除。
创建外部表:
CREATE EXTERNAL TABLE lhttest (
id int,
name string
)
PARTITIONED BY (pt string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\,'
STORED AS TEXTFILE
LOCATION '/data/stg/lhttest';
注意:在删除外部表的时候,不会删除HDFS上的关联文件。
把一个本地系统中的文件加载到visiter表的2010-06-08分区下:
load data local inpath '/root/user/myHiveTestFile/02' overwrite into table visiter PARTITION (dates='2010-06-08');
查询cite表的前3行记录:
select * from cite limit 3;
描述表pv的结构:
Describe pv;
desc pv;
删除表:
drop table lhttest;
把表page_view重命名为pv:
Alter table page_view rename to pv;
给表pv添加一个名为IP的列:
Alter table pv add columns (ip string);
添加一个分区:
Alter table pv add partition (dt='2012-06-26');
把已经存在的一个目录设置为表的一个分区:
alter table pv add partition (dt= '2012-06-27') location '/hive/warehouse/pv/2012-06-27';
显示一个表的分区信息:
show partitions lhtstudent;
删除一个分区:
Alter table pv drop partition (dt='2012-06-26');
从lhtstudent的分区2012-06-26中查出数据,插入到studentnum的名为2012-06-26的分区中:
insert overwrite table studentnum partition (dt='2012-06-26') select count(1) from lhtstudent where dt='2012-06-26'
取lhtstudent中字段name的前3个字符:
Select substr(name,0,3) as subname from lhtstudent;
对字段id进行去重操作:
Select distinct(id) from lhtstudent;
通过Join关联查询(相当于inner join):
select distinct i.id,i.name,s.score from lhtstudent i join studentscore s on i.id = s.stuid;
通过left outer join关联查询(相当于sql的left join):
select i.id,i.name,s.score from lhtstudent i left outer join studentscore s on i.id = s.stuid;
通过right outer join关联查询(相当于sql的right join):
select i.id,i.name,s.score from lhtstudent i right outer join studentscore s on i.id = s.stuid;
通过order by 进行排序:
select * from lhtstudent order by id desc;
通过like进行模糊查询:
select * from lhtstudent where name like '%liu%';
通过group by 进行排序:
select id from lhtstudent group by id;
通过case语句进行转换:
select case dt when '2012-06-25' then '今天' when '2012-06-26' then '明天' end from lhtstudent;
select score , ( case when score >= 60 then '及格' when score < 60 then '不及格' end) from lhtstudent;
通过concat连接两个字符串:
select concat(id,name) from lhtstudent;
upper()字符串转大写:
select upper(name) from lhtstudent;
lower()字符串小写:
select lower(name) from lhtstudent;
length()取到字符串长度:
select name,length(name) from lhtstudent;
trim()去空格:
select trim(name) from lhtstudent;
year(string date)返回年:
select year('2012-07-11 12:30:50') from lhttest; //返回2012
month(string date) 返回月:
select month('2012-07-11 12:30:50') from lhttest;//返回7
day(string date) 返回日:
select day('2012-07-11 12:30:50') from lhttest;//返回11
dayofmonth(date)返回一个月中的第几天:
select dayofmonth('2012-07-11 12:30:50') from lhttest;//返回11
hour(string date) 返回小时:
select hour('2012-07-11 12:30:50') from lhttest;//返回12
minute(string date) 返回分钟:
select minute('2012-07-11 12:30:50') from lhttest;//返回30
second(string date) 返回秒:
select second('2012-07-11 12:30:50') from lhttest;//返回50
weekofyear(string date) 返回一年中的第几周:
select weekofyear('2012-07-11 12:30:50') from lhttest;//返回28
datediff(string enddate, string startdate) 返回enddate和startdate的天数的差:
select datediff('2012-07-11','2012-07-09') from lhttest;//返回2
select datediff('2012-07-11','2012-07-12') from lhttest;//返回-1
date_add(string startdate, int days) 加days天数到startdate:
select date_add('2012-07-11',1) from lhttest;//返回2012-07-12
date_sub(string startdate, int days) 从startdate减去days天:
select date_sub('2012-07-11',1) from lhttest;//返回2012-07-10
to_date(string timestamp) 返回日期字符串:
select to_date('2012-07-12 10:30') from lhttest; //返回2012-07-12
unix_timestamp() 获得当前时间戳:
select unix_timestamp() from lhttest;//返回当前时间的秒数
from_unixtime(int unixtime) 将时间戳(unix epoch秒数)转换为日期时间字符串:
select from_unixtime(1341910120) from lhttest;//返回2012-07-10 16:48:40
select from_unixtime(0) from lhttest;//返回1970-01-01 00:00:00