HiveQL:数据定义

–查看数据库s

show databases; 

–创建数据库

create database human_resources;

–查找开头为h的所有数据库

show database like 'h.*';

–创建数据库,并指定位置

create database financials
location 'my/dir'

–创建数据库。并添加说明

create database financials
comment 'Holds all financials tables';

–查看financials数据库的描述

describe database financials;

–创建数据库,并添加’键-值’的说明

create database financials
with DBPROPERTIES ('creator' = 'Hxh', 'date' = '2010-10-10');

–使用financials数据库

use financials;

–查看当前正在使用的数据库

set hive.cli.print.current.db=true;

–删除数据库,如果该数据库存在(默认内部有表,会删除失败)

drop database if exists financials;

–删除数据库中的表,再删除数据库(CASCADE:小瀑布)

drop database if exists financials CASCADE;

–修改数据库的描述(不可修改元数据信息:库名、位置)

alter database financials set DBPROPERTIES('edited-by'='hugh')

–创建一个雇员表
–(/user/hive/warehouse/ 是默认的’数据仓库’路径地址)
–Hive会自动增加两个表属性:last_modified_by,last_modified_time

create table if not exists mydb.employees(
	name		 string comment 'Employee name',
	salary		 float comment 'Employee salary',
	sybordinates Array comment 'Names of subordinates',
	deductions	 Map
				 comment 'Keys are deductions names,values are percentages',
	address		 struct
				 comment 'home address')
comment 'Description of the table'
tabproperties ('creator'='me','created_at'='2010-10-10')
location '/user/hive/warehouse/mydb.db/employees';

–拷贝一个表

create table if not exists mydb.employees2
like mydb.employees;

–使用一个表

use mydb;
show tables;
use employees;

–查看详细表结构信息

describe extended mydb.employees;

–查看某一列的信息

describe mydb.employees.salary;

–创建外部表

create external table if not exists stocks(
	exchange	string,
	symbol		string,
	ymd			string,
	price_open	float,
	price_high	float,
	price_low	float,
	price_close float,
	volume		int,
	price_adj_close float)
row format delimited fields terminated by ','
location '/data/stocks'

–复制外部表(不会复制数据)

create external table if not exists mydb.employees3
like mydb.employees
location '/path/to/data';

–创建分区表(分区表是为了更快的查询)

create table employees(
	name		 string comment 'Employee name',
	salary		 float comment 'Employee salary',
	sybordinates Array comment 'Names of subordinates',
	deductions	 Map
				 comment 'Keys are deductions names,values are percentages',
	address		 struct
)
partitioned by (country string, state string)

–反映分区结构的子目录

hdfs://master_server/user/hive/warehouse/mydb.db/employees/
.../employees/country=CA/state=AB
.../employees/country=CA/state=BC
...
.../employees/country=US/state=LA
.../employees/country=US/state=AK

–查询分区表中,美国,伊利诺斯州的所有雇员

select * from employees
where country = 'US' and state = 'IL';

–开启严格模式(默认)(如果对分区表进行查询,而where子句没有加分区过滤,会报错)

set hive.mapred.mode=strict;

–开启’非严格’模式

set hive.mapred.mode=nostrict;

–查看表中存在的分区

show partitions employees;

–查看表中的分区(具体)

show partitions employees partition(country='US');

–显示分区键

describe extended employees;

–创建外部分区表(共享、优化查询)(适合日志分析)(即使表被删除,数据也不会被删除)
–partition 分区
–row format 行 格式
–delimited 划界,限制
–terminated 结束(分隔符)

create external table if not exists log_messages(
	hms			int,
	severity	string,
	server		string,
	process_id	int,
	message		string)
partitioned by (year int,month int,day int)
row format delimited fields terminated by '\t';

–查看外部表分区

show partitions log_messages;

–查看表的描述(分区键也会被输出)

describe extended log_messages;

–分区数据实际所在路径

describe extended log_messages partition (year=2012, month=1, day=2);

–自定义表存储格式:stored as textfile(同时指定各种分隔符)

create table employees(
	name		 string,
	salary		 float,
	sybordinates Array,
	deductions	 Map,
	address		 struct
)
row format delimited
fields terminated by '\001'
collection items terminated by '\002'
map keys terminated by '\003'
lines terminated by '\n'
stored as textfile;

–指定输入输出格式(stored as xxx)

create table kst
partitioned by (ds string)
row format serde 'com.linkedin.haivvreo.AvroSerDe'
with serdeproperties ('schema.url'='http://schema_provider/kst.avsc')
stored as 
inputformat 'com.linkedin.haivvreo.AvroContainerInputFormat'
outputformat 'com.linkedin.haivvreo.AvroContainerOutputFormat'

–查看表的输入输出格式

describe extended kst

–删除表(如是外部表,表的元数据信息会被删除,表中的数据不会被删除)

drop table if exists employees;

–表重命名

alter table log_messages rename to logmsgs;

–表增加分区(v0.8.0之后版本可以同时增加一个分区)

alter table log_messages add if not exists
partition (year = 2011, month = 1,day = 1) location 'logs/2011/01/01'
partition (year = 2011, month = 1,day = 2) location 'logs/2011/01/02'
partition (year = 2011, month = 1,day = 3) location 'logs/2011/01/03'
...;

–修改分区(移动位置即可)

alter table log_messages partition(year=2011, month=12, day = 2)
set location 's3n://orbucket.logs/2011/01/02'

–删除某个分区(drop)

alter table log_messages drop id exists partition(year = 2011,mouth = 12,day = 2);

–修改列信息(change)

alter table log_messages
change column hms hours_minutes_seconds INT
comment 'the hours,minutes,and seconds part of the timestamp'
after serverity;

–增加列

alter table log_messages add columns (
	app_name string comment 'Application name',
	session_id long comment 'the current session id');
)

–删除或者替换列(replace)

alter table log_messages replace columns (
	hours_mins_secs int 
)

–修改表属性(只能修改,无法删除)

alter table log_messages set tabproperties (
	'notes' = 'The process id is no longer captured ; this column is always NULL'
);

–修改存储属性(改为sequencefile)

alter table log_messages
partition(year = 2012, month = 1, day = 1)
set fileformat sequencefile;

–各种操作’钩子’技巧(touch)???

alter table log_messages touch
partition(year = 2012, month = 1, day = 1)

–参考<>

你可能感兴趣的:(Hive)