只涉及简单的建表,不涉及分区等复杂操作。
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name -- (Note: TEMPORARY available in Hive 0.14.0 and later)
[(col_name data_type [COMMENT col_comment], ... [constraint_specification])]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
[SKEWED BY (col_name, col_name, ...) -- (Note: Available in Hive 0.10.0 and later)]
ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
[STORED AS DIRECTORIES]
[
[ROW FORMAT row_format]
[STORED AS file_format]
| STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] -- (Note: Available in Hive 0.6.0 and later)
]
[LOCATION hdfs_path]
[TBLPROPERTIES (property_name=property_value, ...)] -- (Note: Available in Hive 0.6.0 and later)
[AS select_statement]; -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
LIKE existing_table_or_view_name
[LOCATION hdfs_path];
data_type
: primitive_type
| array_type
| map_type
| struct_type
| union_type -- (Note: Available in Hive 0.7.0 and later)
primitive_type
: TINYINT
| SMALLINT
| INT
| BIGINT
| BOOLEAN
| FLOAT
| DOUBLE
| DOUBLE PRECISION -- (Note: Available in Hive 2.2.0 and later)
| STRING
| BINARY -- (Note: Available in Hive 0.8.0 and later)
| TIMESTAMP -- (Note: Available in Hive 0.8.0 and later)
| DECIMAL -- (Note: Available in Hive 0.11.0 and later)
| DECIMAL(precision, scale) -- (Note: Available in Hive 0.13.0 and later)
| DATE -- (Note: Available in Hive 0.12.0 and later)
| VARCHAR -- (Note: Available in Hive 0.12.0 and later)
| CHAR -- (Note: Available in Hive 0.13.0 and later)
array_type
: ARRAY < data_type >
map_type
: MAP < primitive_type, data_type >
struct_type
: STRUCT < col_name : data_type [COMMENT col_comment], ...>
union_type
: UNIONTYPE < data_type, data_type, ... > -- (Note: Available in Hive 0.7.0 and later)
row_format
: DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
[MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
[NULL DEFINED AS char] -- (Note: Available in Hive 0.13 and later)
| SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
file_format:
: SEQUENCEFILE
| TEXTFILE -- (Default, depending on hive.default.fileformat configuration)
| RCFILE -- (Note: Available in Hive 0.6.0 and later)
| ORC -- (Note: Available in Hive 0.11.0 and later)
| PARQUET -- (Note: Available in Hive 0.13.0 and later)
| AVRO -- (Note: Available in Hive 0.14.0 and later)
| JSONFILE -- (Note: Available in Hive 4.0.0 and later)
| INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
constraint_specification:
: [, PRIMARY KEY (col_name, ...) DISABLE NOVALIDATE ]
[, CONSTRAINT constraint_name FOREIGN KEY (col_name, ...) REFERENCES table_name(col_name, ...) DISABLE NOVALIDATE
FIELDS TERMINATED BY '$' /*使用$作为字段分隔符*/
MAP KEYS TERMINATED BY '$' /*使用$作为map解析数据时的key value分隔符*/
LINES TERMINATED BY '$' /*使用$作为行与行分隔符*/
COLLECTION ITEMS TERMINATED BY '$' /*使用$作为 Array 中的各元素、 Struct 中的各元素、各个map之间的分隔符*/
EXTERNAL
声明一个表为外部表CREATE EXTERNAL TABLE rowtest(row int);
DROP TABLE [IF EXISTS] table_name [PURGE];
PURGE: 直接删除,不移入回收站
假定HDFS文件/zpy/test/hive/i_row.txt文件格式如下:
abc,zhao-py
qwe,zh-wei
# 以下可用
create table zhaopy.irowtest(
word string,
love array
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
COLLECTION ITEMS TERMINATED BY '-'
LOCATION '/zpy/hive/i';
load命令会将文件移动到rowtest指定的存储位置.
load data inpath '/zpy/test/hive/i_row.txt' into table zhaopy.irowtest;
hive> select * from zhaopy.irowtest;
OK
abc ["zhao","py"]
qwe ["zh","wei"]
Time taken: 0.125 seconds, Fetched: 2 row(s)
假定HDFS文件/zpy/test/hive/f_row.txt文件格式如下:
abc,zhao-py
qwe,zh-wei
# 以下可用
create EXTERNAL table zhaopy.frowtest(
word string,
love array
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
COLLECTION ITEMS TERMINATED BY '-'
LOCATION '/zpy/hive/f';
load命令会将文件移动到rowtest指定的存储位置.
load data inpath '/zpy/test/hive/f_row.txt' into table zhaopy.frowtest;
hive> select * from zhaopy.frowtest;
OK
abc ["zhao","py"]
qwe ["zh","wei"]
Time taken: 0.125 seconds, Fetched: 2 row(s)
[ochadoop@server7 hivetest]$ hadoop fs -ls /zpy/hive/i
Found 1 items
-rwxrwxrwx 3 ochadoop ochadoop 23 2018-09-04 16:44 /zpy/hive/i/i_row.txt
[ochadoop@server7 hivetest]$ hadoop fs -ls /zpy/hive/f
Found 1 items
-rwxrwxrwx 3 ochadoop ochadoop 23 2018-09-04 16:44 /zpy/hive/f/f_row.txt
drop table zhaopy.irowtest;
drop table zhaopy.frowtest;
[ochadoop@server7 hivetest]$ hadoop fs -ls /zpy/hive/i
ls: `/zpy/hive/i': No such file or directory
[ochadoop@server7 hivetest]$ hadoop fs -ls /zpy/hive/f
Found 1 items
-rwxrwxrwx 3 ochadoop ochadoop 23 2018-09-04 16:44 /zpy/hive/f/f_row.txt
此处存在一个问题,collection与map keys同时定义出错,原因未知
ALTER TABLE table_name RENAME TO new_table_name;
ALTER TABLE table_name SET TBLPROPERTIES table_properties;
table_properties:
: (property_name = property_value, property_name = property_value, ... )
元数据分为用户自定义和hive预定义两种,系统预定义见:系统预定义。Hive元数据见:Hive元数据,hive元数据的概念及其查看见:Hive元数据概念及其查看
ALTER TABLE table_name [PARTITION partition_spec] SET SERDE serde_class_name [WITH SERDEPROPERTIES serde_properties];
ALTER TABLE table_name [PARTITION partition_spec] SET SERDEPROPERTIES serde_properties;
serde_properties:
: (property_name = property_value, property_name = property_value, ... )
// 使用举例:更改数据库表的字段分隔符
ALTER TABLE table_name SET SERDEPROPERTIES ('field.delim' = ',');
ALTER TABLE table_name CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name, ...)]
INTO num_buckets BUCKETS;
ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type
[COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];
// example
CREATE TABLE test_change (a int, b int, c int);
// First change column a's name to a1.
ALTER TABLE test_change CHANGE a a1 INT;
// Next change column a1's name to a2, its data type to string, and put it after column b.
ALTER TABLE test_change CHANGE a1 a2 STRING AFTER b;
// The new table's structure is: b int, a2 string, c int.
// Then change column c's name to c1, and put it as the first column.
ALTER TABLE test_change CHANGE c c1 INT FIRST;
// The new table's structure is: c1 int, b int, a2 string.
// Add a comment to column a1
ALTER TABLE test_change CHANGE a1 a1 INT COMMENT 'this is column a1';
ALTER TABLE table_name
[PARTITION partition_spec] -- (Note: Hive 0.14.0 and later)
ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...)
[CASCADE|RESTRICT] -- (Note: Hive 1.1.0 and later)
使用示例
// 给student表添加一个新列class
ALTER TABLE student ADD COLUMNs (class string COMMENT 'class');
// 由于hive无法针对列进行删除,因此我们采用replace替换。
// 假若原始表有三个字段,按顺序为id-name-class,假设我们要删除name,并替换id与class位置,可以使用如下SQL
ALTER TABLE student REPLACE COLUMNS (class string, id int);