学习了尚学堂的hive视频,做如下笔记。主要是hive中的建表,插入,函数相关的知识。
一、表数据
以下为人员信息表数据包含四个字段,分别为id、name、likes、address
- 1,xiaoming,book-tv-football,beijing:haidian-tianjin:wuqing
- 2,sunjian,tv-football,xian:gaoxin-tianjin:wuqing
- 3,liuyang,book-code-football,henan:xinxiang-liaoning:dalian
二、DDL 几种方式创建表
create table psn1(
id int,
name string,
likes array<string>,
address map<string,string>
)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':';
create table psn2(
id int,
name string,
likes array<string>,
address map<string,string>
)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
location '/usr/psn2';
create table psn3
as
select id,name,likes,address from psn1;
create table psn4 like psn1;
create table psn5(
id int,
name string,
likes array<string>,
address map<string,string>
)
partitioned by (sex string)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':';
指定分区字段
load data local inpath '/root/data' into table psn5 partition (sex='boy');
load data local inpath '/root/data' into table psn5 partition (sex='girl');
按分区查询
select * from psn5 where sex='boy';
添加分区
alert table psn5 partition (sex='qita');
删除分区(会删除分区的数据)
alert table psn5 drop partition (sex='qita');
当分区字段定义了多个时(定义时分区字段有顺序,有层次结构)
,载入数据要将多个分区字段都写清楚。
CREATE TABLE apachelog (
host STRING,
identity STRING,
user STRING,
time STRING,
request STRING,
status STRING,
size STRING,
referer STRING,
agent STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
"input.regex" = "([^]*) ([^]*) ([^]*) (-|\\[^\\]*\\]) ([^ \"]*|\"[^\"]*\") (-|[0-9]*) (-|[0-9]*)(?: ([^ \"]*|\".*\") ([^ \"]*|\".*\"))?"
)
STORED AS TEXTFILE;
三、DML
load data local inpath '本地路径' into table psn1;
加载hdfs数据(移动hdfs的文件到表的文件夹)
load data inpath 'hdfs路径' into table psn1;
from psn1 pvs
insert into table psn2
select pvs.id,pvs.name,psv.likes,psv.address;
四、hive中的函数
4、内置表生成函数(UDTF)(太多了,列出几个链接)
官网:
https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF
博客园(中文):
https://www.cnblogs.com/MOBIN/p/5618747.html
易佰教程:
http://www.yiibai.com/hive/hive_built_in_functions.html
5、也可以自己写函数,上传到服务器。