1.行转列
1.1 问题引入:
如何将
a b 1,2,3
c d 4,5,6
变为:
a b 1
a b 2
a b 3
c d 4
c d 5
c d 6
1.2 原始数据:test.txt
a b 1,2,3
c d 4,5,6
1.3 解决方法
方案1:
drop table test_jzl_20140701_test;
建表:
create table test_jzl_20140701_test
(
col1 string,
col2 string,
col3 string
)
row format delimited fields terminated by ' '
stored as textfile;
加载数据:
load data local inpath '/home/jiangzl/shell/test.txt' into table test_jzl_20140701_test;
查看表中所有数据:
select * from test_jzl_20140701_test
a b 1,2,3
c d 4,5,6
遍历数组中的每一列
select col1,col2,name
from test_jzl_20140701_test
lateral view explode(split(col3,',')) col3 as name;
a b 1
a b 2
a b 3
c d 4
c d 5
c d 6
方案2:
drop table test_jzl_20140701_test1;
建表:
create table test_jzl_20140701_test1
(
col1 string,
col2 string,
col3 array<int>
)
row format delimited
fields terminated by ' '
collection items terminated by ',' //定义数组的分隔符
stored as textfile;
加载数据:
load data local inpath '/home/jiangzl/shell/test.txt' into table test_jzl_20140701_test1;
查看表中所有数据:
select * from test_jzl_20140701_test1;
a b [1,2,3]
c d [4,5,6]
遍历数组中的每一列:
select col1,col2,name
from test_jzl_20140701_test1
lateral view explode(col3) col3 as name;
a b 1
a b 2
a b 3
c d 4
c d 5
c d 6
1.4补充知识点:
select * from test_jzl_20140701_test;
a b 1,2,3
c d 4,5,6
select t.list[0],t.list[1],t.list[2] from (
select (split(col3,',')) list from test_jzl_20140701_test)t;
OK
1 2 3
4 5 6
--查看数组长度
select size(split(col3,',')) list from test_jzl_20140701_test;
3
3
2.列转行
2.1问题引入:
hive如何将
a b 1
a b 2
a b 3
c d 4
c d 5
c d 6
变为:
a b 1,2,3
c d 4,5,6
2,2原始数据:
test.txt
a b 12.3 解决方法:
drop table tmp_jiangzl_test;
建表:
create table tmp_jiangzl_test
(
col1 string,
col2 string,
col3 string
)
row format delimited fields terminated by '\t'
stored as textfile;
加载数据:
load data local inpath '/home/jiangzl/shell/test.txt' into table tmp_jiangzl_test;
处理:
select col1,col2,concat_ws(',',collect_set(col3))
from tmp_jiangzl_test
group by col1,col2;