Hive高级语法

-- 通过 hive 加载数据
-- 创建数据表
USE myhivebook;

CREATE TABLE IF NOT EXISTS student
(
    id int,
    name string
) comment '学生表'
row FORMAT delimited
fields terminated BY ',';

desc formatted student;

-- 创建数据
vi /root/data/student.dat
1001,tom
1002,jack
1003,rose

-- 加载数据
-- 复制本地文件上传到hdfs中
LOAD DATA LOCAL INPATH '/root/data/student.dat' OVERWRITE INTO TABLE student;
-- 剪切hdfs文件到其他hdfs目录
dfs -mkdir -p /hadoop/mydata;
dfs -put /root/data/student.dat /hadoop/mydata;
dfs -ls /hadoop/mydata;
LOAD data inpath '/hadoop/mydata/student.dat' overwrite INTO TABLE student;
-- 追加本地文件到表中
LOAD data local inpath '/root/data/student.dat' INTO TABLE student;
-- 追加hdfs文件到表中
TRUNCATE TABLE student;
dfs -put /root/data/student.dat /hadoop/mydata;
LOAD data inpath '/hadoop/mydata/student.dat' INTO TABLE student;

-- 查询数据
SELECT * FROM student;

-- 装载csv数据
-- 逗号分隔值(Comma-Separated Values,CSV,有时也称为字符分隔值,因为分隔字符也可以不是逗号)
-- 获取数据格式
id,name,age,mark
1,tom,22,90
2,jack,23,95
3,rose,21,99
-- 根据以上数据建表
CREATE TABLE IF NOT EXISTS csv_student
(
    id int,
    name string,
    age int,
    mark double
)
row FORMAT
serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
tblproperties('skip.header.line.count'='1')
;
-- 查询表结构
desc formatted csv_student;
-- 上传数据到linux /root/data
cat student.csv
-- 加载数据
LOAD data local inpath '/root/data/student.csv' overwrite INTO TABLE csv_student;
-- 查询业务数据
select * from csv_student;

-- 示例2:
-- 数据
id,name,age,mark
1    'tom'    22    90
2    'jack'    23    95
3    'rose'    21    99
-- 建表
DROP TABLE IF EXISTS csv_student2;
CREATE TABLE IF NOT EXISTS csv_student2
(
    id int,
    name string,
    age int,
    mark double
)
row FORMAT
serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH serdeproperties(
  'separatorChar'='\t',
  'quoteChar'="'"
)
tblproperties('skip.header.line.count'='1')
;
-- 加载数据
LOAD data local inpath '/root/data/student2.csv' overwrite INTO TABLE csv_student2;
-- 查询业务数据
select * from csv_student2;

-- 作业:数据通过分号分隔,字符用双引号界定


-- JSON,JSON(JavaScript Object Notation, JS对象简谱)是一种轻量级的数据交换格式。
{}表示对象
[]表示集合
id,name,age,mark
1,tom,22,90
{
  "id": 1,
  "name": "tom",
  "age": 22,
  "mark": 90
}
[
  {
    "id": 1,
    "name": "tom",
    "age": 22,
    "mark": 90
  },
  {
    "id": 1,
    "name": "tom",
    "age": 22,
    "mark": 90
  }
]
-- 获取数据样例
{"name":"Michael"}
{"name":"Andy","Age":30}
{"name":"Justin","Age":19}
-- 下载第三方jar包专用于解析json格式数据,上传到linux中:/root/data
-- 加载jar包
add jar /root/data/json-serde-1.3.8-jar-with-dependencies.jar;
list jar;
delete jar /root/data/json-serde-1.3.8-jar-with-dependencies.jar;
-- 建表
CREATE TABLE IF NOT EXISTS json_users
(
    name string,
    age int
)
row FORMAT
serde "org.openx.data.jsonserde.JsonSerDe";
-- 把数据上传到 /root/data/ 加载数据
LOAD data local inpath '/root/data/users.json' overwrite INTO TABLE json_users;
-- 查询数据
SELECT * FROM json_users;

-- 分区
-- 静态分区和动态分区
-- 创建静态分区表
CREATE TABLE IF NOT EXISTS student_static_partition
(
    id int,
    name string,
    age int
)
partitioned by(sex string)
row FORMAT delimited
fields terminated BY ',';
-- 查看表结构
desc student_static_partition;
-- 添加分区
ALTER TABLE student_static_partition ADD partition(sex='male');
ALTER TABLE student_static_partition ADD partition(sex='female');
-- 或
ALTER TABLE student_static_partition ADD partition(sex='male') partition(sex='female');

-- 删除分区
ALTER TABLE student_static_partition DROP partition(sex='male');
ALTER TABLE student_static_partition DROP partition(sex='female');
-- 或
ALTER TABLE student_static_partition DROP partition(sex='male'),partition(sex='female');

-- 查看分区
show partitions student_static_partition;
+-------------+--+
|  partition  |
+-------------+--+
| sex=female  |
| sex=male    |
+-------------+--+

-- 创建数据
vi /root/data/student_male.dat
1,tom,22
2,jack,23

vi /root/data/student_female.dat
3,rose,20
4,marry,21

-- 加载分区数据
LOAD data local inpath '/root/data/student_male.dat' overwrite 
INTO TABLE student_static_partition partition(sex='male');

LOAD data local inpath '/root/data/student_female.dat' overwrite 
INTO TABLE student_static_partition partition(sex='female');

-- 查看数据
SELECT * FROM student_static_partition;

你可能感兴趣的:(json,hive,hadoop,hdfs,大数据)