hive实现树形结构展开

结果样式

hive实现树形结构展开_第1张图片

-- sql原q
SELECT SYS_CONNECT_BY_PATH(T.AREANAME, '-->') AS RESULT,
     T.AREACODE,
     T.AREANAME,
     T.PARENTCODE,
     LEVEL
FROM TAREAINFOTREE T
START WITH T.AREACODE = '0001'
CONNECT BY PRIOR T.AREACODE = T.PARENTCODE
ORDER BY LEVEL, T.AREACODE;

hive 准备

--              --
-- 测试环境准备 --
--              --

-- 创建测试表
drop table if exists pub.test;
create table pub.test(
    areaname string,areacode string,parentcode string
)
row format delimited
fields terminated by ',';
-- 创建测试数据
-- values 不好使 why!
INSERT into TABLE pub.test 
SELECT
    t.*
from(SELECT 0 ) a
lateral view inline(array(
    struct('a','0001' ,'0'),struct('aa','00010001','0001'),
    struct('ab','00010002','0001'),struct('ac','00010003','0001'),
    struct('b','0002','0'),struct('ba','00020001','0002'),
    struct('aaa','000100010001','00010001'),
    struct('aaaa','0001000100010001','000100010001'),
    struct('aaaaa','00010001000100010001','0001000100010001')
)) t as areaname,areacode,parentcode;

执行hql脚本

--              --
-- 查询插入结果 --
--              --

-- 创建临时表
drop table if exists pub.test_temp;
create table pub.test_temp(
	level1 struct<areaname:string,areacode:string,parentcode:string>,
	level2 struct<areaname:string,areacode:string,parentcode:string>,
	level3 struct<areaname:string,areacode:string,parentcode:string>,
	level4 struct<areaname:string,areacode:string,parentcode:string>,
	level5 struct<areaname:string,areacode:string,parentcode:string>
);
-- 1
INSERT overwrite table pub.test_temp
SELECT
	named_struct('areaname',t.areaname,'areacode',t.areacode,'parentcode',t.parentcode),
	named_struct('areaname','','areacode','','parentcode',''),named_struct('areaname','','areacode','','parentcode',''),
	named_struct('areaname','','areacode','','parentcode',''),named_struct('areaname','','areacode','','parentcode','')
from pub.test t
where LENGTH(t.areacode)<=4;
-- 2
INSERT into table pub.test_temp
SELECT
	a.level1,
	named_struct('areaname',t.areaname,'areacode',t.areacode,'parentcode',t.parentcode),
	named_struct('areaname','','areacode','','parentcode',''),
	named_struct('areaname','','areacode','','parentcode',''),named_struct('areaname','','areacode','','parentcode','')
from pub.test t
join pub.test_temp a
on t.parentcode=a.level1.areacode;
-- 3
INSERT into table pub.test_temp
SELECT
	a.level1,a.level2,
	named_struct('areaname',t.areaname,'areacode',t.areacode,'parentcode',t.parentcode),
	named_struct('areaname','','areacode','','parentcode',''),named_struct('areaname','','areacode','','parentcode','')
from pub.test t
join pub.test_temp a
on t.parentcode=a.level2.areacode;
-- 4
INSERT into table pub.test_temp
SELECT
	a.level1,a.level2,a.level3,
	named_struct('areaname',t.areaname,'areacode',t.areacode,'parentcode',t.parentcode),
	named_struct('areaname','','areacode','','parentcode','')
from pub.test t
join pub.test_temp a
on t.parentcode=a.level3.areacode;
-- 5
INSERT into table pub.test_temp
SELECT
	a.level1,a.level2,a.level3,a.level4,
	named_struct('areaname',t.areaname,'areacode',t.areacode,'parentcode',t.parentcode)
from pub.test t
join pub.test_temp a
on t.parentcode=a.level4.areacode;
-- 目标查询
SELECT
	*
FROM(
	SELECT
		CONCAT('-->',t.level1.areaname,if(LENGTH(t.level2.areacode)>0,
			CONCAT('-->',t.level2.areaname,if(LENGTH(t.level3.areacode)>0,
				CONCAT('-->',t.level3.areaname,if(LENGTH(t.level4.areacode)>0,
					CONCAT('-->',t.level4.areaname,if(LENGTH(t.level5.areacode)>0,
						CONCAT('-->',t.level5.areaname),'')
					),'')
				),'')
			),'')
		) as result,
		case 
			WHEN length(t.level5.areacode)>0 THEN t.level5.areacode 
			WHEN length(t.level4.areacode)>0 THEN t.level4.areacode
			WHEN length(t.level3.areacode)>0 THEN t.level3.areacode 
			WHEN length(t.level2.areacode)>0 THEN t.level2.areacode 
			WHEN length(t.level1.areacode)>0 THEN t.level1.areacode 
		END as areacode,
		case 
			WHEN length(t.level5.areaname)>0 THEN t.level5.areaname 
			WHEN length(t.level4.areaname)>0 THEN t.level4.areaname
			WHEN length(t.level3.areaname)>0 THEN t.level3.areaname 
			WHEN length(t.level2.areaname)>0 THEN t.level2.areaname 
			WHEN length(t.level1.areaname)>0 THEN t.level1.areaname 
		END as areaname,
		case 
			WHEN length(t.level5.parentcode)>0 THEN t.level5.parentcode 
			WHEN length(t.level4.parentcode)>0 THEN t.level4.parentcode
			WHEN length(t.level3.parentcode)>0 THEN t.level3.parentcode 
			WHEN length(t.level2.parentcode)>0 THEN t.level2.parentcode 
			WHEN length(t.level1.parentcode)>0 THEN t.level1.parentcode 
		END as parentcode,
		case 
			WHEN length(t.level5.parentcode)>0 THEN 5 
			WHEN length(t.level4.parentcode)>0 THEN 4
			WHEN length(t.level3.parentcode)>0 THEN 3
			WHEN length(t.level2.parentcode)>0 THEN 2
			WHEN length(t.level1.parentcode)>0 THEN 1
		END as level
	from pub.test_temp t
)t
ORDER BY t.level;

检查

--检验流程结果
select * from pub.test;
SELECT * from pub.test_temp t;

你可能感兴趣的:(bigdata,hive)