hivesql练习

源表:
province,province_id,city,city_id,area,area_id
甘肃省,11,张掖市,21,甘州区,31
甘肃省,11,张掖市,21,山丹县,32
甘肃省,11,张掖市,21,高台县,33
甘肃省,11,兰州市,22,七里河,34
甘肃省,11,兰州市,22,新区,35
北京,12,北京,12,海淀区,36
北京,12,北京,12,昌平区,37

目标表:
+------+-------+---------+--+
|  地区  | 地区id  | 上层地区id  |
+------+-------+---------+--+
| 昌平区  | 37    | 12      |
| 海淀区  | 36    | 12      |
| 山丹县  | 32    | 21      |
| 甘州区  | 31    | 21      |
| 高台县  | 33    | 21      |
| 七里河  | 34    | 22      |
| 新区   | 35    | 22      |
| 兰州市  | 22    | 11      |
| 张掖市  | 21    | 11      |
| 北京   | 12    | 12      |
| 北京   | 12    | 0       |
| 甘肃省  | 11    | 0       |
+------+-------+---------+--+


create table diqu(
province string,
province_id string,
city string,
city_id string,
area string,
area_id string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
;
一:
select
if(province_id is null,area,if(city_id is null,province,city)) as `地区`,
if(province_id is null,area_id,if(city_id is null,province_id,city_id)) as `地区id`,
if(province_id is null,city_id,if(city_id is null,0,province_id)) as `上层地区id`
from
(
select
province,province_id,city,city_id,area,area_id
from diqu
group by province,province_id,city,city_id,area,area_id
grouping sets ((province,province_id),(city,city_id,province_id),(area,area_id,city_id))
) t1
+------+-------+---------+--+
|  地区  | 地区id  | 上层地区id  |
+------+-------+---------+--+
| 昌平区  | 37    | 12      |
| 海淀区  | 36    | 12      |
| 山丹县  | 32    | 21      |
| 甘州区  | 31    | 21      |
| 高台县  | 33    | 21      |
| 七里河  | 34    | 22      |
| 新区   | 35    | 22      |
| 兰州市  | 22    | 11      |
| 张掖市  | 21    | 11      |
| 北京   | 12    | 12      |
| 北京   | 12    | 0       |
| 甘肃省  | 11    | 0       |
+------+-------+---------+--+
12 rows selected (19.641 seconds)
二:
select 
province as `地区`,
province_id as `地区id`,
'0' as `上层地区id`
from diqu
group by province,province_id
union 
select 
city as `地区`,
city_id as `地区id`,
province_id as `上层地区id`
from diqu
group by city,city_id,province_id
union
select
area as `地区`,
area_id as `地区id`,
city_id as `上层地区id`
from diqu
group by area,area_id,city_id

+---------+-----------+-------------+--+
| _u2.地区  | _u2.地区id  | _u2.上层地区id  |
+---------+-----------+-------------+--+
| 七里河     | 34        | 22          |
| 兰州市     | 22        | 11          |
| 北京      | 12        | 0           |
| 北京      | 12        | 12          |
| 山丹县     | 32        | 21          |
| 张掖市     | 21        | 11          |
| 新区      | 35        | 22          |
| 昌平区     | 37        | 12          |
| 海淀区     | 36        | 12          |
| 甘州区     | 31        | 21          |
| 甘肃省     | 11        | 0           |
| 高台县     | 33        | 21          |
+---------+-----------+-------------+--+
12 rows selected (91.654 seconds)

你可能感兴趣的:(hive,hive,sql)