配置java,maven等环境变量
java:
export JAVA_HOME=/usr/local/java
export PATH=$JAVA_HOME/bin:$PATH
export CLASSPATH=$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tool.jar:$CLASSPATH
maven:
export MVN_HOME=/usr/local/maven
export PATH=$MVN_HOME/bin:$PATH
下载 hive-json-schema
[root@cdh01 cloudera]# git clone https://github.com/quux00/hive-json-schema.git
正克隆到 'hive-json-schema'...
remote: Enumerating objects: 155, done.
remote: Total 155 (delta 0), reused 0 (delta 0), pack-reused 155
接收对象中: 100% (155/155), 142.85 KiB | 34.00 KiB/s, done.
处理 delta 中: 100% (35/35), done.
[root@cdh01 cloudera]# cd hive-json-schema/
[root@cdh01 hive-json-schema]# ll
总用量 8
-rw-r--r-- 1 root root 1610 6月 27 23:25 pom.xml
-rw-r--r-- 1 root root 3873 6月 27 23:25 README.md
drwxr-xr-x 3 root root 18 6月 27 23:25 src
maven 打包
[root@cdh01 hive-json-schema]# mvn package
[INFO] Scanning for projects...
[WARNING]
[WARNING] Some problems were encountered while building the effective model for net.thornydev:json-hive-schema:jar:1.0
[WARNING] 'build.plugins.plugin.version' for org.apache.maven.plugins:maven-compiler-plugin is missing. @ line 13, column 15
[WARNING]
[WARNING] It is highly recommended to fix these problems because they threaten the stability of your build.
[WARNING]
[WARNING] For this reason, future Maven versions might no longer support building such malformed projects.
[WARNING]
[INFO]
[INFO] ------------------------------------------------------------------------
[INFO] Building json-hive-schema 1.0
[INFO] ------------------------------------------------------------------------
[INFO]
[INFO] --- maven-resources-plugin:2.5:resources (default-resources) @ json-hive-schema ---
[debug] execute contextualize
[WARNING] Using platform encoding (UTF-8 actually) to copy filtered resources, i.e. build is platform dependent!
[INFO] skip non existing resourceDirectory /root/github/cloudera/hive-json-schema/src/main/resources
[INFO]
[INFO] --- maven-compiler-plugin:2.3.2:compile (default-compile) @ json-hive-schema ---
[WARNING] File encoding has not been set, using platform encoding UTF-8, i.e. build is platform dependent!
[INFO] Compiling 8 source files to /root/github/cloudera/hive-json-schema/target/classes
[INFO]
[INFO] --- maven-resources-plugin:2.5:testResources (default-testResources) @ json-hive-schema ---
[debug] execute contextualize
[WARNING] Using platform encoding (UTF-8 actually) to copy filtered resources, i.e. build is platform dependent!
[INFO] skip non existing resourceDirectory /root/github/cloudera/hive-json-schema/src/test/resources
[INFO]
[INFO] --- maven-compiler-plugin:2.3.2:testCompile (default-testCompile) @ json-hive-schema ---
[INFO] No sources to compile
[INFO]
[INFO] --- maven-surefire-plugin:2.10:test (default-test) @ json-hive-schema ---
-------------------------------------------------------
T E S T S
-------------------------------------------------------
Results :
Tests run: 0, Failures: 0, Errors: 0, Skipped: 0
[INFO]
[INFO] --- maven-jar-plugin:2.4:jar (default-jar) @ json-hive-schema ---
[INFO] Building jar: /root/github/cloudera/hive-json-schema/target/json-hive-schema-1.0.jar
[INFO]
[INFO] --- maven-assembly-plugin:2.4:single (default) @ json-hive-schema ---
[INFO] Building jar: /root/github/cloudera/hive-json-schema/target/json-hive-schema-1.0-jar-with-dependencies.jar
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 29.893s
[INFO] Finished at: Thu Jun 27 23:26:07 CST 2019
[INFO] Final Memory: 19M/206M
[INFO] ------------------------------------------------------------------------
[root@cdh01 hive-json-schema]# ll target/
总用量 64
drwxr-xr-x 2 root root 6 6月 27 23:26 archive-tmp
drwxr-xr-x 4 root root 28 6月 27 23:25 classes
drwxr-xr-x 3 root root 25 6月 27 23:25 generated-sources
-rw-r--r-- 1 root root 29091 6月 27 23:25 json-hive-schema-1.0.jar
-rw-r--r-- 1 root root 29124 6月 27 23:26 json-hive-schema-1.0-jar-with-dependencies.jar
drwxr-xr-x 2 root root 28 6月 27 23:25 maven-archiver
drwxr-xr-x 2 root root 6 6月 27 23:26 surefire
生成 Hive 建表语句
测试用 json 数据
{
"description": "my doc",
"foo": {
"bar": "baz",
"quux": "revlos",
"level1" : {
"l2string": "l2val",
"l2struct": {
"level3": "l3val"
}
}
},
"wibble": "123",
"wobble": [
{
"entry": 1,
"EntryDetails": {
"details1": "lazybones",
"details2": 414
}
},
{
"entry": 2,
"EntryDetails": {
"details1": "entry 123"
}
}
]
}
# 方式一 不可执行的jar
java -cp target/json-hive-schema-1.0.jar net.thornydev.JsonHiveSchema file.json
java -cp target/json-hive-schema-1.0.jar net.thornydev.JsonHiveSchema file.json table_name
# 方式二 可执行的jar
java -jar target/json-hive-schema-1.0-jar-with-dependencies.jar file.json
java -jar target/json-hive-schema-1.0-jar-with-dependencies.jar file.json table_name
生成建表语句
[root@cdh01 hive-json-schema]# java -cp target/json-hive-schema-1.0.jar net.thornydev.JsonHiveSchema file.json
CREATE TABLE x (
description string,
foo struct<bar:string, level1:struct<l2string:string, l2struct:struct<level3:string>>, quux:string>,
wibble string,
wobble array<struct<entry:int, entrydetails:struct<details1:string, details2:int>>>)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
[root@cdh01 hive-json-schema]# java -cp target/json-hive-schema-1.0.jar net.thornydev.JsonHiveSchema file.json table_name
CREATE TABLE table_name (
description string,
foo struct<bar:string, level1:struct<l2string:string, l2struct:struct<level3:string>>, quux:string>,
wibble string,
wobble array<struct<entry:int, entrydetails:struct<details1:string, details2:int>>>)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
[root@cdh01 hive-json-schema]# java -jar target/json-hive-schema-1.0-jar-with-dependencies.jar file.json
CREATE TABLE x (
description string,
foo struct<bar:string, level1:struct<l2string:string, l2struct:struct<level3:string>>, quux:string>,
wibble string,
wobble array<struct<entry:int, entrydetails:struct<details1:string, details2:int>>>)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
[root@cdh01 hive-json-schema]# java -jar target/json-hive-schema-1.0-jar-with-dependencies.jar file.json table_name
CREATE TABLE table_name (
description string,
foo struct<bar:string, level1:struct<l2string:string, l2struct:struct<level3:string>>, quux:string>,
wibble string,
wobble array<struct<entry:int, entrydetails:struct<details1:string, details2:int>>>)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
验证 Hive 表结构
测试数据 file.json
{"description":"mydoc","foo":{"bar":"baz","quux":"revlos","level1":{"l2string":"l2val","l2struct":{"level3":"l3val"}}},"wibble":"123","wobble":[{"entry":1,"EntryDetails":{"details1":"lazybones","details2":414}},{"entry":2,"EntryDetails":{"details1":"entry123"}}]}
{"description":"mytxt","foo":{"bar":"sas","quux":"revlos","level1":{"l2string":"l2val","l2struct":{"level3":"l3val"}}},"wibble":"123","wobble":[{"entry":1,"EntryDetails":{"details1":"lazybones","details2":414}}]}
创建表
hive> CREATE TABLE table_name (
> description string,
> foo struct<bar:string, level1:struct<l2string:string, l2struct:struct<level3:string>>, quux:string>,
> wibble string,
> wobble array<struct<entry:int, entrydetails:struct<details1:string, details2:int>>>)
> ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
OK
Time taken: 0.135 seconds
加载数据
LOAD DATA LOCAL INPATH '/root/tmp/file.json' OVERWRITE INTO TABLE table_name ;
查询数据
SELECT * FROM table_name;
SELECT wobble.entry, wobble.EntryDetails.details1, wobble.EntryDetails[0].details2 FROM table_name;