大数据-HiveDDL和DML操作

DDL和DML操作

一、DDL操作

(1)查看数据库
show databases;

(2)创建数据库

create databases [if not exists] databases_name [location path];

(3)查看数据库结构

desc databases [extended] databases_name;

(4)添加额外的描述信息

alter databases databases_name set dbproperties ('');

(5)删除数据库

drop database [if exists] databases_name [cascade];

(6)创建表

create [external] table [if not exists] table_name(field) [partitioned by (field)] [clustered by (field)] [sorted by (field)] row format delimited fields terminated by 'split';
# external 删除外部表,却不能删除HDFS数据

(7)查看表类型

desc formatted table_name;

(8)不登录Hive客户端直接命令操作Hive

hive -e SQL

(9)通过文件操作Hive

hive -f filePath

(10)查看HDFS文件

dfs ls path;
dfs cat filePath;

(11)查看历史操作

cat ./hivehistory

(12)单分区查询

select field from table_name where field = value;

(13)联合查询

select field from table_name where field = value union select field from table_name where field = value;

(14)添加单个分区

alter table table_name add partition();

(15)查看分区

show partition table_name;

(16)删除分区

alter table table_name drop partition();

(17)分区表在HDFS中分目录文件夹导入数据修复

msck repair table table_name;

二、DML操作

(1)导入数据
load data [local] inpath path into table;
#local 本地Linux数据,无local则是HDFS数据

(2)插入数据

insert into table table_name [partition(field = value)] values();
insert overwrite table table_name partition(field = value) select * from table_name;
create table [if not exists] table_name as select * from table_name;
create table table_name(field) row format delimited fields terminated by split location hdfs_file;

(3)结果导出到Linux

insert overwrite local directory linux_path select * from table_name;

(4)表导出到HDFS

export table table_name to path;

(5)HDFS导入到Hive

import table table_name form path;

(6)清空表数据

truncate table table_name;

(7)求总数

select count(field) from table_name;

(8)求最大/最小值

select max/min(field) from table_name;

(9)求最大/最小值

select max/min(field) from table_name;

(10)求总和/平均

select sum/avg(field) from table_name;

(11)where语句

select * from table_name where field;

(12)is null与is not null

select * from table_name where field is not null;

(13)like语句

select * from table_name where field like string;
'field%' 以field开头
'_field%' 第二位是field
‘%field%’ 含有field

(14)and和or语句

select * from table_name where field1 and [or] field2;

(15)not语句

select * from table_name where field1 not in (value1,value2);

(16)having语句

select * from table_name having count (field) > 1;

(16)having语句

select * from table_name having count (field) > 1;

(17)join语句

select a.field,b.field from table_name a [left] join table_name b on a.field = b.field;  #保留左表,右表没有join,就显示null
select a.field,b.field from table_name a right join table_name b on a.field = b.field;  #保留右表,左表没有join,就显示null

(18)group by语句

select field from table_name group by field;

(19)order by语句

select * from table_name order by field;

(20)sort by语句(内部排序)

select * from table_name sort by field;
设置reduce个数:set mapreduce.job.reduces = value;

(20)distribute by语句(分区排序)

select * from table_name distribute by field distribute by field;

(20)cluster by语句(分区内部排序)

select * from table_name distribute by field cluster by field;
设置分桶:set hive.enforce.bucketing = true;
设置reduce个数:set mapreduce.job.reduces = -1;
create table table_name(field) clustered by(field) into value buckets row format delimited fields terminated by split;
select * from table_name(bucket 1 out of 2 on id)

三、UDF自定义函数

(1)继承UDF

(2)打包放到lib目录下

(3)create temporary function function as "package"(暂时的)


	hive.aux.jars.path
	file:///root/hd/hive/lib/jar包的名字

你可能感兴趣的:(大数据)