hive 文本解析实例

下面是一个表中的三条数据

hive> select * from redis.ods_allhash_activity_0518_quest_receive limit 3;
OK
key     value   pt
activity.0518.quest.receive.191800873463        {"{1={\"index\":1,\"days\":50,\"name\":\"5元红包\",\"totalInvest\":0.00,\"daysLimit\":0.00,\"rewardAmount\":5.00,\"singleInvest\":2000.00,\"rewardType\":3}, 2={\"index\":2,\"days\":0,\"name\":\"1次抽奖\",\"totalInvest\":5000.00,\"daysLimit\":0.00,\"rewardAmount\":1.00,\"singleInvest\":0.00,\"rewardType\":1}, 3={\"index\":3,\"days\":50,\"name\":\"8元红包\",\"totalInvest\":0.00,\"daysLimit\":50.00,\"rewardAmount\":8.00,\"singleInvest\":5000.00,\"rewardType\":3}}":null}     2017-05-15
activity.0518.quest.receive.191803789914        {"{1={\"index\":1,\"days\":50,\"name\":\"5元红包\",\"totalInvest\":0.00,\"daysLimit\":0.00,\"rewardAmount\":5.00,\"singleInvest\":2000.00,\"rewardType\":3}, 2={\"index\":2,\"days\":0,\"name\":\"1次抽奖\",\"totalInvest\":5000.00,\"daysLimit\":0.00,\"rewardAmount\":1.00,\"singleInvest\":0.00,\"rewardType\":1}, 3={\"index\":3,\"days\":50,\"name\":\"8元红包\",\"totalInvest\":0.00,\"daysLimit\":50.00,\"rewardAmount\":8.00,\"singleInvest\":5000.00,\"rewardType\":3}}":null}     2017-05-15
activity.0518.quest.receive.191806742784        {"{1={\"index\":1,\"days\":50,\"name\":\"5元红包\",\"totalInvest\":0.00,\"daysLimit\":0.00,\"rewardAmount\":5.00,\"singleInvest\":2000.00,\"rewardType\":3}, 2={\"index\":2,\"days\":0,\"name\":\"1次抽奖\",\"totalInvest\":5000.00,\"daysLimit\":0.00,\"rewardAmount\":1.00,\"singleInvest\":0.00,\"rewardType\":1}}":null}    2017-05-15
Time taken: 0.081 seconds, Fetched: 3 row(s)

下面对value进行解析:
1、value整体是一个字典型,我们需要取出其的键值。

select   map_keys(value),
map_keys(value)[0]
,split(map_keys(value)[0],'},')
from redis.ods_allhash_activity_0518_quest_receive
where pt='2017-05-15'
limit 1

得到:

[
    "{1={\"index\":1,\"days\":50,\"name\":\"5元红包\",\"totalInvest\":0.00,\"daysLimit\":0.00,\"rewardAmount\":5.00,\"singleInvest\":2000.00,\"rewardType\":3}, 2={\"index\":2,\"days\":0,\"name\":\"1次抽奖\",\"totalInvest\":5000.00,\"daysLimit\":0.00,\"rewardAmount\":1.00,\"singleInvest\":0.00,\"rewardType\":1}}"
]

{
    1={
        "index": 1,
        "days": 50,
        "name": "5元红包",
        "totalInvest": 0.00,
        "daysLimit": 0.00,
        "rewardAmount": 5.00,
        "singleInvest": 2000.00,
        "rewardType": 3
    },
    2={
        "index": 2,
        "days": 0,
        "name": "1次抽奖",
        "totalInvest": 5000.00,
        "daysLimit": 0.00,
        "rewardAmount": 1.00,
        "singleInvest": 0.00,
        "rewardType": 1
    }
}
[
    "{1={\"index\":1,\"days\":50,\"name\":\"5元红包\",\"totalInvest\":0.00,\"daysLimit\":0.00,\"rewardAmount\":5.00,\"singleInvest\":2000.00,\"rewardType\":3",
    " 2={\"index\":2,\"days\":0,\"name\":\"1次抽奖\",\"totalInvest\":5000.00,\"daysLimit\":0.00,\"rewardAmount\":1.00,\"singleInvest\":0.00,\"rewardType\":1}}"
]

3、

select *
from
(
select  substr(key,29,12) user_id,
split(map_keys(value)[0],'},') value
from redis.ods_allhash_activity_0518_quest_receive
where pt='2017-05-15'
)a LATERAL VIEW explode(value) b AS value
limit 2

得到:

191804375471
[
    "{1={\"index\":1,\"days\":50,\"name\":\"5元红包\",\"totalInvest\":0.00,\"daysLimit\":0.00,\"rewardAmount\":5.00,\"singleInvest\":2000.00,\"rewardType\":3",
    " 2={\"index\":2,\"days\":0,\"name\":\"1次抽奖\",\"totalInvest\":5000.00,\"daysLimit\":0.00,\"rewardAmount\":1.00,\"singleInvest\":0.00,\"rewardType\":1}}"
]
{
    1={
        "index": 1,
        "days": 50,
        "name": "5元红包",
        "totalInvest": 0.00,
        "daysLimit": 0.00,
        "rewardAmount": 5.00,
        "singleInvest": 2000.00,
        "rewardType": 3
      191804375471
        [
            "{1={\"index\":1,\"days\":50,\"name\":\"5元红包\",\"totalInvest\":0.00,\"daysLimit\":0.00,\"rewardAmount\":5.00,\"singleInvest\":2000.00,\"rewardType\":3",
            " 2={\"index\":2,\"days\":0,\"name\":\"1次抽奖\",\"totalInvest\":5000.00,\"daysLimit\":0.00,\"rewardAmount\":1.00,\"singleInvest\":0.00,\"rewardType\":1}}"
        ]
    2={
            "index": 2,
            "days": 0,
            "name": "1次抽奖",
            "totalInvest": 5000.00,
            "daysLimit": 0.00,
            "rewardAmount": 1.00,
            "singleInvest": 0.00,
            "rewardType": 1
      }
}

4、

select user_id,
regexp_replace(b.value,'^\\{|\\}\\}$|\\s','') value
from
(
select  substr(key,29,12) user_id,
split(map_keys(value)[0],'},') value
from redis.ods_allhash_activity_0518_quest_receive
where pt='2017-05-15'
)a LATERAL VIEW explode(value) b AS value
limit 1

得到

191804375471    1={"index":1,"days":50,"name":"5元红包","totalInvest":0.00,"daysLimit":0.00,"rewardAmount":5.00,"singleInvest":2000.00,"rewardType":3

5、

select
  user_id,
  split(value,'=')[0] value_0
  ,concat(split(value,'=')[1],'}') value_1

from
(
  select user_id,
  regexp_replace(b.value,'^\\{|\\}\\}$|\\s','') value
  from
  (
  select  substr(key,29,12) user_id,
  split(map_keys(value)[0],'},') value
  from redis.ods_allhash_activity_0518_quest_receive
  where pt='2017-05-15'
  )a LATERAL VIEW explode(value) b AS value
)a
limit 1

得到

191804375471    1       {"index":1,"days":50,"name":"5元红包","totalInvest":0.00,"daysLimit":0.00,"rewardAmount":5.00,"singleInvest":2000.00,"rewardType":3}

6、

  select user_id,
    value_0
    ,get_json_object(value_1,'$.index') index
    ,get_json_object(value_1,'$.days') days
    ,get_json_object(value_1,'$.name') name
    ,get_json_object(value_1,'$.totalInvest') totalInvest
    ,get_json_object(value_1,'$.daysLimit') daysLimit
    ,get_json_object(value_1,'$.rewardAmount') rewardAmount
    ,get_json_object(value_1,'$.singleInvest') singleInvest
    ,get_json_object(value_1,'$.rewardType') rewardType
  from
  (
    select
      user_id,
      split(value,'=')[0] value_0
      ,concat(split(value,'=')[1],'}') value_1

    from
    (
      select user_id,
      regexp_replace(b.value,'^\\{|\\}\\}$|\\s','') value
      from
      (
      select  substr(key,29,12) user_id,
      split(map_keys(value)[0],'},') value
      from redis.ods_allhash_activity_0518_quest_receive
      where pt='2017-05-15'
      )a LATERAL VIEW explode(value) b AS value
    )a
  )a
limit 10

得到:

user_id value_0 index   days    name    totalinvest     dayslimit       rewardamount    singleinvest    rewardtype
191804375471    1       1       50      5元红包 0.0     0.0     5.0     2000.0  3
191804375471    2       2       0       1次抽奖 5000.0  0.0     1.0     0.0     1
191803456717    1       1       50      5元红包 0.0     0.0     5.0     2000.0  3
191803456717    2       2       0       1次抽奖 5000.0  0.0     1.0     0.0     1
191804479258    1       1       50      5元红包 0.0     0.0     5.0     2000.0  3
191804479258    2       2       0       1次抽奖 5000.0  0.0     1.0     0.0     1
191807738946    1       1       50      5元红包 0.0     0.0     5.0     2000.0  3
191801320811    1       1       50      5元红包 0.0     0.0     5.0     2000.0  3
191801320811    2       2       0       1次抽奖 5000.0  0.0     1.0     0.0     1
191804392803    1       1       50      5元红包 0.0     0.0     5.0     2000.0  3

方法二:

  select
        map_keys(value)[0]
        ,map_values(str_to_map(map_values(value)[0],',',':'))[0]
        ,map_values(str_to_map(map_values(value)[0],',',':'))[1]
        ,map_values(str_to_map(map_values(value)[0],',',':'))[2]
        ,map_values(str_to_map(map_values(value)[0],',',':'))[3]
        ,map_values(str_to_map(map_values(value)[0],',',':'))[4]
        ,map_values(str_to_map(map_values(value)[0],',',':'))[5]
        ,map_values(str_to_map(map_values(value)[0],',',':'))[6]
        ,map_values(str_to_map(map_values(value)[0],',',':'))[7]
    from
    (
        select
            str_to_map(concat(value,'&'),'&','\\=\\{') value
        from
        (
            select
            regexp_replace(b.value,'^\\{|\\}\\}$|\\s','') value
            from
            (
            select
                split(map_keys(value)[0],'},') value
            from redis.ods_allhash_activity_0518_quest_receive
            limit 1
            )a lateral view explode(value) b as value
        )a
    )a;

这种方法没细细研究

你可能感兴趣的:(hive)