hive和presto可以使用get_json_object函数解析json字符串。
准备数据
drop table if exists test.test_zw;
CREATE TABLE if not exists test.test_zw(
stu_id int COMMENT 'id',
info string comment '学生信息'
)
COMMENT '测试表'
STORED as parquet TBLPROPERTIES('parquet.compression'='SNAPPY');
-- 插入数据
insert into test.test_zw values
(1,'{"name":"小明","grade":60}'),
(2,'{"name":"小红","grade":100}');
简单查看数据
select *
from test.test_zw
解析json
select stu_id
,get_json_object(info,'$.name') as name
,get_json_object(info,'$.grade') as grade
from test.test_zw
有时候我们需要解析嵌套json字符串,只需取数的时候用”.“连接即可。
准备数据
drop table if exists test.test_zw;
CREATE TABLE if not exists test.test_zw(
stu_id int COMMENT 'id',
info string comment '学生信息'
)
COMMENT '测试表'
STORED as parquet TBLPROPERTIES('parquet.compression'='SNAPPY');
-- 插入数据
insert into test.test_zw values
(1,'{"name":"小明","course":{"math":60,"yuwen":70}}'),
(2,'{"name":"小红","course":{"math":100,"yuwen":80}}');
简单查询
select *
from test.test_zw
解析json
select stu_id
,get_json_object(info,'$.name') as name
,get_json_object(info,'$.course.math') as math
,get_json_object(info,'$.course.yuwen') as yunwen
from test.test_zw
有时候json的键存储的是某个id,跟行的id一致,这时候可以使用concat拼接字符串来提取指定的值。
准备数据
drop table if exists test.test_zw;
CREATE TABLE if not exists test.test_zw(
stu_id int COMMENT '学生id',
course_id int comment '课程id',
info string comment '学生信息'
)
COMMENT '测试表'
STORED as parquet TBLPROPERTIES('parquet.compression'='SNAPPY');
insert into test.test_zw values
(1,101,'{"1":"小明","101":60}'),
(2,102,'{"2":"小红","102":100}');
简单查询
select *
from test.test_zw
解析json
select stu_id
,course_id
,get_json_object(info,concat('$.',stu_id)) as name
,get_json_object(info,concat('$.',course_id)) as grade
from test.test_zw