利用 JsonSerde 为 复杂 JSON 格式数据创建 Hive 表结构

  1. 配置java,maven等环境变量

    java:

    export JAVA_HOME=/usr/local/java
    export PATH=$JAVA_HOME/bin:$PATH
    export CLASSPATH=$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tool.jar:$CLASSPATH
    

    maven:

    export MVN_HOME=/usr/local/maven
    export PATH=$MVN_HOME/bin:$PATH
    
  2. 下载 hive-json-schema

    [root@cdh01 cloudera]# git clone https://github.com/quux00/hive-json-schema.git
    正克隆到 'hive-json-schema'...
    remote: Enumerating objects: 155, done.
    remote: Total 155 (delta 0), reused 0 (delta 0), pack-reused 155
    接收对象中: 100% (155/155), 142.85 KiB | 34.00 KiB/s, done.
    处理 delta 中: 100% (35/35), done.
    [root@cdh01 cloudera]# cd hive-json-schema/
    [root@cdh01 hive-json-schema]# ll
    总用量 8
    -rw-r--r-- 1 root root 1610 6月  27 23:25 pom.xml
    -rw-r--r-- 1 root root 3873 6月  27 23:25 README.md
    drwxr-xr-x 3 root root   18 6月  27 23:25 src
    
  3. maven 打包

    [root@cdh01 hive-json-schema]# mvn package
    [INFO] Scanning for projects...
    [WARNING] 
    [WARNING] Some problems were encountered while building the effective model for net.thornydev:json-hive-schema:jar:1.0
    [WARNING] 'build.plugins.plugin.version' for org.apache.maven.plugins:maven-compiler-plugin is missing. @ line 13, column 15
    [WARNING] 
    [WARNING] It is highly recommended to fix these problems because they threaten the stability of your build.
    [WARNING] 
    [WARNING] For this reason, future Maven versions might no longer support building such malformed projects.
    [WARNING] 
    [INFO]                                                                         
    [INFO] ------------------------------------------------------------------------
    [INFO] Building json-hive-schema 1.0
    [INFO] ------------------------------------------------------------------------
    [INFO] 
    [INFO] --- maven-resources-plugin:2.5:resources (default-resources) @ json-hive-schema ---
    [debug] execute contextualize
    [WARNING] Using platform encoding (UTF-8 actually) to copy filtered resources, i.e. build is platform dependent!
    [INFO] skip non existing resourceDirectory /root/github/cloudera/hive-json-schema/src/main/resources
    [INFO] 
    [INFO] --- maven-compiler-plugin:2.3.2:compile (default-compile) @ json-hive-schema ---
    [WARNING] File encoding has not been set, using platform encoding UTF-8, i.e. build is platform dependent!
    [INFO] Compiling 8 source files to /root/github/cloudera/hive-json-schema/target/classes
    [INFO] 
    [INFO] --- maven-resources-plugin:2.5:testResources (default-testResources) @ json-hive-schema ---
    [debug] execute contextualize
    [WARNING] Using platform encoding (UTF-8 actually) to copy filtered resources, i.e. build is platform dependent!
    [INFO] skip non existing resourceDirectory /root/github/cloudera/hive-json-schema/src/test/resources
    [INFO] 
    [INFO] --- maven-compiler-plugin:2.3.2:testCompile (default-testCompile) @ json-hive-schema ---
    [INFO] No sources to compile
    [INFO] 
    [INFO] --- maven-surefire-plugin:2.10:test (default-test) @ json-hive-schema ---
    
    -------------------------------------------------------
     T E S T S
    -------------------------------------------------------
    
    Results :
    
    Tests run: 0, Failures: 0, Errors: 0, Skipped: 0
    
    [INFO] 
    [INFO] --- maven-jar-plugin:2.4:jar (default-jar) @ json-hive-schema ---
    [INFO] Building jar: /root/github/cloudera/hive-json-schema/target/json-hive-schema-1.0.jar
    [INFO] 
    [INFO] --- maven-assembly-plugin:2.4:single (default) @ json-hive-schema ---
    [INFO] Building jar: /root/github/cloudera/hive-json-schema/target/json-hive-schema-1.0-jar-with-dependencies.jar
    [INFO] ------------------------------------------------------------------------
    [INFO] BUILD SUCCESS
    [INFO] ------------------------------------------------------------------------
    [INFO] Total time: 29.893s
    [INFO] Finished at: Thu Jun 27 23:26:07 CST 2019
    [INFO] Final Memory: 19M/206M
    [INFO] ------------------------------------------------------------------------
    [root@cdh01 hive-json-schema]# ll target/
    总用量 64
    drwxr-xr-x 2 root root     6 627 23:26 archive-tmp
    drwxr-xr-x 4 root root    28 627 23:25 classes
    drwxr-xr-x 3 root root    25 627 23:25 generated-sources
    -rw-r--r-- 1 root root 29091 627 23:25 json-hive-schema-1.0.jar
    -rw-r--r-- 1 root root 29124 627 23:26 json-hive-schema-1.0-jar-with-dependencies.jar
    drwxr-xr-x 2 root root    28 627 23:25 maven-archiver
    drwxr-xr-x 2 root root     6 627 23:26 surefire
    
    
  4. 生成 Hive 建表语句

    测试用 json 数据

    {
      "description": "my doc",
      "foo": {
        "bar": "baz",
        "quux": "revlos",
        "level1" : {
          "l2string": "l2val",
          "l2struct": {
            "level3": "l3val"
          }
        }
      },
      "wibble": "123",
      "wobble": [
        {
          "entry": 1,
          "EntryDetails": {
            "details1": "lazybones",
            "details2": 414
          }
        },
        {
          "entry": 2,
          "EntryDetails": {
            "details1": "entry 123"
          }
        }
      ]
    }
    
    # 方式一 不可执行的jar
    java -cp target/json-hive-schema-1.0.jar net.thornydev.JsonHiveSchema file.json
    java -cp target/json-hive-schema-1.0.jar net.thornydev.JsonHiveSchema file.json table_name
    # 方式二 可执行的jar
    java -jar target/json-hive-schema-1.0-jar-with-dependencies.jar file.json
    java -jar target/json-hive-schema-1.0-jar-with-dependencies.jar file.json table_name
    

    生成建表语句

    [root@cdh01 hive-json-schema]# java -cp target/json-hive-schema-1.0.jar net.thornydev.JsonHiveSchema file.json
    CREATE TABLE x (
      description string,
      foo struct<bar:string, level1:struct<l2string:string, l2struct:struct<level3:string>>, quux:string>,
      wibble string,
      wobble array<struct<entry:int, entrydetails:struct<details1:string, details2:int>>>)
    ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
    
    [root@cdh01 hive-json-schema]# java -cp target/json-hive-schema-1.0.jar net.thornydev.JsonHiveSchema file.json table_name
    CREATE TABLE table_name (
      description string,
      foo struct<bar:string, level1:struct<l2string:string, l2struct:struct<level3:string>>, quux:string>,
      wibble string,
      wobble array<struct<entry:int, entrydetails:struct<details1:string, details2:int>>>)
    ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
    
    [root@cdh01 hive-json-schema]# java -jar target/json-hive-schema-1.0-jar-with-dependencies.jar file.json
    CREATE TABLE x (
      description string,
      foo struct<bar:string, level1:struct<l2string:string, l2struct:struct<level3:string>>, quux:string>,
      wibble string,
      wobble array<struct<entry:int, entrydetails:struct<details1:string, details2:int>>>)
    ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
    
    [root@cdh01 hive-json-schema]# java -jar target/json-hive-schema-1.0-jar-with-dependencies.jar file.json table_name
    CREATE TABLE table_name (
      description string,
      foo struct<bar:string, level1:struct<l2string:string, l2struct:struct<level3:string>>, quux:string>,
      wibble string,
      wobble array<struct<entry:int, entrydetails:struct<details1:string, details2:int>>>)
    ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
    
  5. 验证 Hive 表结构

    测试数据 file.json

    {"description":"mydoc","foo":{"bar":"baz","quux":"revlos","level1":{"l2string":"l2val","l2struct":{"level3":"l3val"}}},"wibble":"123","wobble":[{"entry":1,"EntryDetails":{"details1":"lazybones","details2":414}},{"entry":2,"EntryDetails":{"details1":"entry123"}}]}
    {"description":"mytxt","foo":{"bar":"sas","quux":"revlos","level1":{"l2string":"l2val","l2struct":{"level3":"l3val"}}},"wibble":"123","wobble":[{"entry":1,"EntryDetails":{"details1":"lazybones","details2":414}}]}
    

    创建表

    hive> CREATE TABLE table_name (
    >   description string,
    >   foo struct<bar:string, level1:struct<l2string:string, l2struct:struct<level3:string>>, quux:string>,
    >   wibble string,
    >   wobble array<struct<entry:int, entrydetails:struct<details1:string, details2:int>>>)
    > ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
    OK
    Time taken: 0.135 seconds
    

    加载数据

    LOAD DATA LOCAL INPATH '/root/tmp/file.json' OVERWRITE INTO TABLE table_name ;
    

    查询数据

    SELECT * FROM table_name;
    SELECT wobble.entry, wobble.EntryDetails.details1, wobble.EntryDetails[0].details2 FROM table_name;
    

    利用 JsonSerde 为 复杂 JSON 格式数据创建 Hive 表结构_第1张图片
    利用 JsonSerde 为 复杂 JSON 格式数据创建 Hive 表结构_第2张图片

你可能感兴趣的:(Hive)