CREATE DATABASE [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]
[WITH DBPROPERTIES (property_name=property_value, ...)];
默认地址:/user/hive/warehouse/db_name.db/table_name/partition_name/…
USE database_name;
注意:只有use时不用写DATABASE关键字
DROP DATABASE [IF EXISTS] database_name [RESTRICT|CASCADE];
ALTER DATABASE database_name SET DBPROPERTIES (property_name=property_value, …);
ALTER DATABASE database_name SET OWNER [USER|ROLE] user_or_role;
# 手动建表
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name -- (Note: TEMPORARY available in Hive 0.14.0 and later)
[(col_name data_type [COMMENT col_comment], ...)]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
[SKEWED BY (col_name, col_name, ...) -- (Note: Available in Hive 0.10.0 and later)]
ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
[STORED AS DIRECTORIES]
[
[ROW FORMAT row_format]
[STORED AS file_format]
| STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] -- (Note: Available in Hive 0.6.0 and later)
]
[LOCATION hdfs_path]
[TBLPROPERTIES (property_name=property_value, ...)] -- (Note: Available in Hive 0.6.0 and later)
[AS select_statement]; -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
# 复制表结构
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
LIKE existing_table_or_view_name
[LOCATION hdfs_path];
数据类型
data_type
: primitive_type
| array_type
| map_type
| struct_type
| union_type -- (Note: Available in Hive 0.7.0 and later)
primitive_type
: TINYINT
| SMALLINT
| INT
| BIGINT
| BOOLEAN
| FLOAT
| DOUBLE
| STRING
| BINARY -- (Note: Available in Hive 0.8.0 and later)
| TIMESTAMP -- (Note: Available in Hive 0.8.0 and later)
| DECIMAL -- (Note: Available in Hive 0.11.0 and later)
| DECIMAL(precision, scale) -- (Note: Available in Hive 0.13.0 and later)
| DATE -- (Note: Available in Hive 0.12.0 and later)
| VARCHAR -- (Note: Available in Hive 0.12.0 and later)
| CHAR -- (Note: Available in Hive 0.13.0 and later)
array_type
: ARRAY < data_type >
map_type
: MAP < primitive_type, data_type >
struct_type
: STRUCT < col_name : data_type [COMMENT col_comment], ...>
union_type
: UNIONTYPE < data_type, data_type, ... > -- (Note: Available in Hive 0.7.0 and later)
# 行列分隔符
row_format
: DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
[MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
[NULL DEFINED AS char] -- (Note: Available in Hive 0.13 and later)
| SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
# 文件存储格式
file_format:
: SEQUENCEFILE
| TEXTFILE -- (Default, depending on hive.default.fileformat configuration)
| RCFILE -- (Note: Available in Hive 0.6.0 and later)
| ORC -- (Note: Available in Hive 0.11.0 and later)
| PARQUET -- (Note: Available in Hive 0.13.0 and later)
| AVRO -- (Note: Available in Hive 0.14.0 and later)
| INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘\001’ STORED AS SEQUENCEFILE;
ROW FORMAT SERDE … STORED AS SEQUENCEFILE;
RegEx SerDe
CREATE TABLE apachelog (
host STRING,
identity STRING,
user STRING,
time STRING,
request STRING,
status STRING,
size STRING,
referer STRING,
agent STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
"input.regex" = "([^]*) ([^]*) ([^]*) (-|\\[^\\]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\".*\") ([^ \"]*|\".*\"))?"
)
STORED AS TEXTFILE;
CREATE TABLE page_view(viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User')
COMMENT 'This is the page view table'
PARTITIONED BY(dt STRING, country STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
STORED AS SEQUENCEFILE;
可以自定义HDFS存储地址,drop表时数据不删除,还是要指定分隔符的
CREATE EXTERNAL TABLE page_view(viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User',
country STRING COMMENT 'country of origination')
COMMENT 'This is the staging page view table'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054'
STORED AS TEXTFILE
LOCATION '' ;
根据查询结果的列和列类型建表,可以自己指定列分隔符和文件存储格式
CREATE TABLE new_key_value_store
ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
STORED AS RCFile
AS
SELECT (key % 1024) new_key, concat(key, value) key_value_pair
FROM key_value_store
SORT BY new_key, key_value_pair;
复制已有表的表结构,不复制数据(属性一样,仅表名不同)
CREATE TABLE empty_key_value_store
LIKE key_value_store;
CREATE TABLE page_view(viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User')
COMMENT 'This is the page view table'
PARTITIONED BY(dt STRING, country STRING)
CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
STORED AS SEQUENCEFILE;
例如:建表语句
CREATE TABLE user_info_bucketed(user_id BIGINT, firstname STRING, lastname STRING)
COMMENT 'A bucketed copy of user_info'
PARTITIONED BY(ds STRING)
CLUSTERED BY(user_id) INTO 256 BUCKETS;
插入语句
set hive.enforce.bucketing = true;
FROM user_id
INSERT OVERWRITE TABLE user_info_bucketed
PARTITION (ds='2009-02-25')
SELECT userid, firstname, lastname WHERE ds='2009-02-25';
某列的几个值出现频率非常高,所以相比于其它值他们的运算非常慢,造成倾斜,Hive可以指定将特定的值单独存储到独立文件中来提高性能。
单列举例
CREATE TABLE list_bucket_single (key STRING, value STRING)
SKEWED BY (key) ON (1,5,6) [STORED AS DIRECTORIES];
多列举例
CREATE TABLE list_bucket_multiple (col1 STRING, col2 int, col3 STRING)
SKEWED BY (col1, col2) ON (('s1',1), ('s3',3), ('s13',13), ('s78',78)) [STORED AS DIRECTORIES];
DROP TABLE [IF EXISTS] table_name [PURGE];
清空表数据,指定partition时只清空特定partition的数据。
TRUNCATE TABLE table_name [PARTITION (partition_column = partition_col_value, partition_column = partition_col_value, ...)];
ALTER TABLE table_name RENAME TO new_table_name;
ALTER TABLE table_name SET TBLPROPERTIES (property_name = property_value, property_name = property_value, ... );
ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);
ALTER TABLE table_name [PARTITION partition_spec] SET SERDE serde_class_name [WITH SERDEPROPERTIES (property_name = property_value, property_name = property_value, ... )];
ALTER TABLE table_name [PARTITION partition_spec] SET SERDEPROPERTIES (property_name = property_value, property_name = property_value, ... );
属性的名和值都需要加引号
ALTER TABLE table_name SET SERDEPROPERTIES ('field.delim' = ',');
ALTER TABLE table_name CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name, ...)]
INTO num_buckets BUCKETS;
只修改metadata,用户需要自己确保实际数据的格式与改完后的metadata相符
改为倾斜表
ALTER TABLE table_name SKEWED BY (col_name1, col_name2, ...)
ON ([(col_name1_value, col_name2_value, ...) [, (col_name1_value, col_name2_value), ...]
[STORED AS DIRECTORIES];
改为非倾斜表
ALTER TABLE table_name NOT SKEWED;
改为不单独存储倾斜列
ALTER TABLE table_name NOT STORED AS DIRECTORIES;
修改倾斜表位置
ALTER TABLE table_name SET SKEWED LOCATION (col_name1="location1" [, col_name2="location2", ...] );
ALTER TABLE table_name ADD [IF NOT EXISTS] PARTITION partition_spec
[LOCATION 'location1'] partition_spec [LOCATION 'location2'] ...;
partition_spec:
: (partition_column = partition_col_value, partition_column = partition_col_value, ...)
修改分区仅修改metadata,实际数据需要用户自己修改
ALTER TABLE table_name PARTITION partition_spec RENAME TO PARTITION partition_spec;
相当于更改了分区对应的那个列的值
ALTER TABLE table_name_1 EXCHANGE PARTITION (partition_spec) WITH TABLE table_name_2;
-- multiple partitions
ALTER TABLE table_name_1 EXCHANGE PARTITION (partition_spec, partition_spec2, ...) WITH TABLE table_name_2;
将分区从一个表迁移到另一个表,要求两个表结构一致且目标表没有这个分区
当我们手动传数据到hdfs作为一个分区时需要在metadata进行设置以便能够识别
MSCK REPAIR TABLE table_name;
ALTER TABLE table_name DROP [IF EXISTS] PARTITION partition_spec[, PARTITION partition_spec, ...]
[IGNORE PROTECTION] [PURGE];
删除分区时同时删除metadata和data,删除的data到用户垃圾箱
ALTER TABLE table_name [PARTITION partition_spec] SET FILEFORMAT file_format;
ALTER TABLE table_name [PARTITION partition_spec] SET LOCATION "new location";
列名是大小写敏感的
ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type
[COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];
例子
CREATE TABLE test_change (a int, b int, c int);
# First change column a's name to a1.
ALTER TABLE test_change CHANGE a a1 INT;
# Next change column a1's name to a2, its data type to string, and put it after column b.
ALTER TABLE test_change CHANGE a1 a2 STRING AFTER b;
# The new table's structure is: b int, a2 string, c int.
# Then change column c's name to c1, and put it as the first column.
ALTER TABLE test_change CHANGE c c1 INT FIRST;
# The new table's structure is: c1 int, b int, a2 string.
# Add a comment to column a1
ALTER TABLE test_change CHANGE a1 a1 INT COMMENT 'this is column a1';
ALTER TABLE table_name
[PARTITION partition_spec]
ADD COLUMNS (col_name data_type [COMMENT col_comment], ...)
[CASCADE|RESTRICT]
添加的列位于普通列最后位置,在分区列之前
CREATE VIEW [IF NOT EXISTS] [db_name.]view_name [(column_name [COMMENT column_comment], ...) ]
[COMMENT view_comment]
[TBLPROPERTIES (property_name = property_value, ...)]
AS SELECT ...;
例子
CREATE VIEW onion_referrers(url COMMENT 'URL of Referring page')
COMMENT 'Referrers to The Onion website'
AS
SELECT DISTINCT referrer_url
FROM page_view
WHERE page_url='http://www.theonion.com';
DROP VIEW [IF EXISTS] view_name;
例子
DROP VIEW onion_referrers;
ALTER VIEW view_name SET TBLPROPERTIES table_properties;
table_properties:
: (property_name = property_value, property_name = property_value, ...)
ALTER VIEW view_name AS select_statement;
只能用于没有分区的视图
CREATE INDEX index_name
ON TABLE base_table_name (col_name, ...)
AS index_type
[WITH DEFERRED REBUILD]
[IDXPROPERTIES (property_name=property_value, ...)]
[IN TABLE index_table_name]
[
[ ROW FORMAT ...] STORED AS ...
| STORED BY ...
]
[LOCATION hdfs_path]
[TBLPROPERTIES (...)]
[COMMENT "index comment"];
DROP INDEX [IF EXISTS] index_name ON table_name;
ALTER INDEX index_name ON table_name [PARTITION partition_spec] REBUILD;
CREATE TEMPORARY FUNCTION function_name AS class_name;
利用类名创建临时方法,在当前session有效,类名可以先add jar
DROP TEMPORARY FUNCTION [IF EXISTS] function_name;
CREATE FUNCTION [db_name.]function_name AS class_name
[USING JAR|FILE|ARCHIVE 'file_uri' [, JAR|FILE|ARCHIVE 'file_uri'] ];
需要限制到数据库,如果没有写数据库,默认当前数据库
需要先添加进去并用using访问
DROP FUNCTION [IF EXISTS] function_name;
RELOAD FUNCTION;
SHOW (DATABASES|SCHEMAS) [LIKE 'identifier_with_wildcards'];
用”*”表示任意个字符,”|”表示或
SHOW TABLES [IN database_name] [like 'identifier_with_wildcards'];
in语句表示数据库,不写默认当前数据库
用”*”表示任意个字符,”|”表示或
SHOW PARTITIONS table_name;
也可以指定分区中的一个或几个字段
SHOW PARTITIONS table_name PARTITION(ds='2010-03-03');
SHOW PARTITIONS table_name PARTITION(hr='12');
SHOW PARTITIONS table_name PARTITION(ds='2010-03-03', hr='12');
SHOW TABLE EXTENDED [IN|FROM database_name] LIKE 'identifier_with_wildcards' [PARTITION(partition_spec)];
extended会给出表的文件信息和文件大小、修改时间等信息
如果指定了partition就不能用表的正则匹配了,只能指定具体的表
SHOW TBLPROPERTIES tblname;
SHOW TBLPROPERTIES tblname("foo");
第一种方式给出表的所有属性值
第二种方式给出表的指定的属性值
SHOW CREATE TABLE ([db_name.]table_name|view_name);
既能看table,也能看view
SHOW [FORMATTED] (INDEX|INDEXES) ON table_with_index [(FROM|IN) db_name];
SHOW COLUMNS (FROM|IN) table_name [(FROM|IN) db_name];
SHOW FUNCTIONS "a.*";
查看所有方法时用”.*”
SHOW LOCKS ;
SHOW LOCKS EXTENDED;
SHOW LOCKS PARTITION ();
SHOW LOCKS PARTITION () EXTENDED;
SHOW LOCKS <DATABASE> database_name;
SHOW CONF ;
不会列出当前配置的值,如果需要请使用set命令
SHOW TRANSACTIONS;
SHOW COMPACTIONS;
DESCRIBE DATABASE [EXTENDED] db_name;
extended给出数据库的属性信息
没有指定数据库时
DESCRIBE [EXTENDED|FORMATTED]
table_name[.col_name ( [.field_name] | [.'$elem$'] | [.'$key$'] | [.'$value$'] )* ];
指定数据库时
DESCRIBE [EXTENDED|FORMATTED]
[db_name.]table_name[ col_name ( [.field_name] | [.'$elem$'] | [.'$key$'] | [.'$value$'] )* ];
DESCRIBE FORMATTED [db_name.]table_name column_name;
DESCRIBE FORMATTED [db_name.]table_name column_name PARTITION (partition_spec);
没有指定数据库时
DESCRIBE [EXTENDED|FORMATTED] table_name[.column_name] PARTITION partition_spec;
指定数据库时
DESCRIBE [EXTENDED|FORMATTED] [db_name.]table_name [column_name] PARTITION partition_spec;
例子
DESCRIBE page_view PARTITION (ds='2008-08-08');