CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]//默认在仓库根目录
[WITH DBPROPERTIES (property_name=property_value, ...)];
DROP (DATABASE|SCHEMA) [IF EXISTS] database_name [RESTRICT|CASCADE];//数据库中有表存在需要使用CASCADE
ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...); -- (Note: SCHEMA added in Hive 0.14.0)
ALTER (DATABASE|SCHEMA) database_name SET OWNER [USER|ROLE] user_or_role; -- (Note: Hive 0.13.0 and later; SCHEMA added in Hive 0.14.0)
ALTER (DATABASE|SCHEMA) database_name SET LOCATION hdfs_path; -- (Note: Hive 2.2.1, 2.4.0 and later)
show databses ;
show databases like 'db_hive*';
use db_hive;
desc database db_hive_03;
wiki
1. CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
[(col_name data_type [COMMENT col_comment], ... [constraint_specification])]
[COMMENT table_comment]
[
[ROW FORMAT row_format] //每一行的分隔符、格式
[STORED AS file_format]
| STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] -- (Note: Available in Hive 0.6.0 and later)
]
[LOCATION hdfs_path]
[TBLPROPERTIES (property_name=property_value, ...)] -- (Note: Available in Hive 0.6.0 and later)
eg:create table IF NOT EXISTS default.log_20150913(
ip string COMMENT 'remote ip address',
users string COMMENT 'users',
req_url string COMMENT 'user request url')
COMMENT 'beifeng web access logs'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '
STORED AS TEXTFILE ;
//分表,提高分析速率
2. CREATE TABLE [IF NOT EXISTS] [db_name.]table_name [AS select_statement]; -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
eg: create table IF NOT EXISTS default.log_20150913_sa AS
select ip,req_url from default.log_20150913;
3. CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
LIKE existing_table_or_view_name
[LOCATION hdfs_path];
eg: create table IF NOT EXISTS default.log_20150914 like default.log_20150913;
EXTERNAL:
在hive中表的类型:
load data local inpath '/opt/datas/-log.txt’into table default.log_20150913;
DROP TABLE [IF EXISTS] table_name [PURGE]; -- (Note: PURGE available in Hive 0.14.0 and later)
ALTER TABLE table_name RENAME TO new_table_name;
ALTER TABLE table_name SET TBLPROPERTIES table_properties;
table_properties:
: (property_name = property_value, property_name = property_value, ... )
ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);
ALTER TABLE table_name [PARTITION partition_spec] SET SERDE serde_class_name [WITH SERDEPROPERTIES serde_properties];
ALTER TABLE table_name [PARTITION partition_spec] SET SERDEPROPERTIES serde_properties;
serde_properties:
: (property_name = property_value, property_name = property_value, ... )
分区表其实就是对应要给HDFS文件系统上的独立文件夹,该文件夹下时该分区所有的数据文件。Hive中的分区就是分目录,把一个大的数据集根据业务需求分割成更小的数据集
在查询时,通过where子句中的表达式来选择查询所需要的指定的分区,这样查询的效率会提高很多。
eg:
create table IF NOT EXISTS default.dept_partition(
deptno int,
dname string,
loc string)
partitioned by (month string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
//加载数据
load data local inpath '/opt/datas/dept.txt' overwrite into table default.dept_partition
partition (month='201509');
//查询分区内容
select * from dept_partition where month='201509';
添加分区的两种方式(如果没有自动生成分区目录):
1.
dfs -mkdir -p /user/hive/warehouse/dept_part/day=20150913;
dfs -put /opt/datas/dept.txt /user/hive/warehouse/dept_part/day=20150913;
msck repair table dept-part;//修复
dfs -mkdir -p /user/hive/warehouse/dept_part/day=20150914;
dfs -put /opt/datas/dept.txt /user/hive/warehouse/dept_part/day=20150914;
alter table dept-part add partition(day= '20150914');
wiki