show databases;
(2)创建数据库
create databases [if not exists] databases_name [location path];
(3)查看数据库结构
desc databases [extended] databases_name;
(4)添加额外的描述信息
alter databases databases_name set dbproperties ('');
(5)删除数据库
drop database [if exists] databases_name [cascade];
(6)创建表
create [external] table [if not exists] table_name(field) [partitioned by (field)] [clustered by (field)] [sorted by (field)] row format delimited fields terminated by 'split';
# external 删除外部表,却不能删除HDFS数据
(7)查看表类型
desc formatted table_name;
(8)不登录Hive客户端直接命令操作Hive
hive -e SQL
(9)通过文件操作Hive
hive -f filePath
(10)查看HDFS文件
dfs ls path;
dfs cat filePath;
(11)查看历史操作
cat ./hivehistory
(12)单分区查询
select field from table_name where field = value;
(13)联合查询
select field from table_name where field = value union select field from table_name where field = value;
(14)添加单个分区
alter table table_name add partition();
(15)查看分区
show partition table_name;
(16)删除分区
alter table table_name drop partition();
(17)分区表在HDFS中分目录文件夹导入数据修复
msck repair table table_name;
load data [local] inpath path into table;
#local 本地Linux数据,无local则是HDFS数据
(2)插入数据
insert into table table_name [partition(field = value)] values();
insert overwrite table table_name partition(field = value) select * from table_name;
create table [if not exists] table_name as select * from table_name;
create table table_name(field) row format delimited fields terminated by split location hdfs_file;
(3)结果导出到Linux
insert overwrite local directory linux_path select * from table_name;
(4)表导出到HDFS
export table table_name to path;
(5)HDFS导入到Hive
import table table_name form path;
(6)清空表数据
truncate table table_name;
(7)求总数
select count(field) from table_name;
(8)求最大/最小值
select max/min(field) from table_name;
(9)求最大/最小值
select max/min(field) from table_name;
(10)求总和/平均
select sum/avg(field) from table_name;
(11)where语句
select * from table_name where field;
(12)is null与is not null
select * from table_name where field is not null;
(13)like语句
select * from table_name where field like string;
'field%' 以field开头
'_field%' 第二位是field
‘%field%’ 含有field
(14)and和or语句
select * from table_name where field1 and [or] field2;
(15)not语句
select * from table_name where field1 not in (value1,value2);
(16)having语句
select * from table_name having count (field) > 1;
(16)having语句
select * from table_name having count (field) > 1;
(17)join语句
select a.field,b.field from table_name a [left] join table_name b on a.field = b.field; #保留左表,右表没有join,就显示null
select a.field,b.field from table_name a right join table_name b on a.field = b.field; #保留右表,左表没有join,就显示null
(18)group by语句
select field from table_name group by field;
(19)order by语句
select * from table_name order by field;
(20)sort by语句(内部排序)
select * from table_name sort by field;
设置reduce个数:set mapreduce.job.reduces = value;
(20)distribute by语句(分区排序)
select * from table_name distribute by field distribute by field;
(20)cluster by语句(分区内部排序)
select * from table_name distribute by field cluster by field;
设置分桶:set hive.enforce.bucketing = true;
设置reduce个数:set mapreduce.job.reduces = -1;
create table table_name(field) clustered by(field) into value buckets row format delimited fields terminated by split;
select * from table_name(bucket 1 out of 2 on id)
(1)继承UDF
(2)打包放到lib目录下
(3)create temporary function function as "package"(暂时的)
hive.aux.jars.path
file:///root/hd/hive/lib/jar包的名字