hive连续n天登陆

hive连续n天登陆

日期 用户 年龄

dates,users,age
11,test_1,23
11,test_2,19
11,test_3,39
11,test_1,23
11,test_3,39
11,test_1,23
12,test_2,19
13,test_1,23

两个需求:

求出连续登陆的用户的总数和平均年龄
求出所有用户的总数和平均年龄

create table da(
dates int,
users string,
age int
)row format delimited fields terminated by ‘,’;
load data local inpath ‘/hive/da.txt’ overwrite into table da;

求出连续登陆的用户的总数和平均年龄

with shi as (
select
users,
count(*)`连续`,
min(dates)`首登`,
max(dates)`末登`
from
(
select
users,
dates,
(dates-rn)chazhi
from
(
select
users,
dates,
row_number()over(partition by users order by dates)rn
from
da
group by users,dates
)t2)t3
group by users,chazhi
having count(*)>1
), 

zi as(
select users,age from da
)

select
users,
avg(age)over(partition by users order by age)`连续登陆用户的平均年龄`,
count(*)`连续登陆人数的总和`
from
(
select
a.age,b.users
from
zi a
join 
shi b 
on a.users=b.users
)t
group by users,age
;

求出所有用户的总数和平均年龄

select
avg(age)`平均年龄`
from
da;


select
count(*)`所有人数`
from
(
select
distinct users
from
da
)t
;

你可能感兴趣的:(大数据,hive)