[hive] 经典sql题及答案(三)

推荐:

经典sql题及答案(一)
经典sql题及答案(二)

题目部分

22 、使用hive 求出两个数据集的差集?
数据
t1表:
id name
1 zs
2 ls
t2表:
id name
1 zs
3 ww
结果如下:
id name
2 ls
3 ww

23
[hive] 经典sql题及答案(三)_第1张图片

25 、每个用户连续登陆的最大天数?
数据:
login表
uid,date
1,2019-08-01
1,2019-08-02
1,2019-08-03
2,2019-08-01
2,2019-08-02
3,2019-08-01
3,2019-08-03
4,2019-07-28
4,2019-07-29
4,2019-08-01
4,2019-08-02
4,2019-08-03
结果如下:
uid cnt_days
1 3
2 2
3 1
4 3

32 、有如下三张表:
表A(登录表):
ds user_id
2019-08-06 1
2019-08-06 2
2019-08-06 3
2019-08-06 4
表B(阅读表):
ds user_id read_num
2019-08-06 1 2
2019-08-06 2 3
2019-08-06 3 6
表C(付费表):
ds user_id price
2019-08-06 1 55.6
2019-08-06 2 55.8
基于上述三张表,请使用hive的hql语句实现如下需求:
(1)、用户登录并且当天有个阅读的用户数,已经阅读书籍数量
(2)、用户登录并且阅读,但是没有付费的用户数
(3)、用户登录并且付费,付费用户书籍和金额

37 数据如下:
1,zhangsan,数学,80,2015
2,lisi,语文,90,2016
3,wangwu,化学,70,2017
4,zhangsan,语文,80,2015
5,zhangsan,化学,90,2015
6,lisi,语文,70,2015
[hive] 经典sql题及答案(三)_第2张图片

答案部分

22
create table sql022t1
(
id string,
name string
)
row format delimited 
fields terminated by '\t';
create table sql022t2
like sql022t1;
load data local inpath '/root/in/sql022t1' overwrite into table sql022t1;
load data local inpath '/root/in/sql022t2' overwrite into table sql022t2;

交集
select
    t1.id,
    t1.name,
    t2.name
from
    sql022t1 t1
join
    sql022t2 t2
on
    t1.id=t2.id;tt1
并集
select
    id,
    name
from
    sql022t1
union
select
    id,
    name
from
    sql022t2;tt2
差集=并集-交集
select
tt2.id,
tt2.name
from
    ( 
    select
    t1.id id,
    t1.name name,
    t2.name name1
    from
        sql022t1 t1
    join
        sql022t2 t2
    on
    t1.id=t2.id
    )tt1
right join
    (select
    id,
    name
    from
        sql022t1 t1
    union
    select
        id,
        name
    from
    sql022t2)tt2
on
    tt1.id=tt2.id
where
    tt1.id is null;
    
23
123,dasfdasas,3,200,1535945356,2018-08-08
124,dasfadass,1,200,1535945356,2018-08-08
125,dadassfas,3,200,1535945356,2018-08-09
126,dadassfas,2,200,1535945356,2018-08-09
127,dasfdasas,5,200,1535945356,2018-08-09

create table sql023
(
orderid int,
userid string,
productid int,
price int,
`timestamp` int,
dt string
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql023'into table sql023;

得到每个客户今天和昨天每天购买的商品列表(单个商品只出现一次)
select
    userid,
    productid,
    dt
from
    sql023
where
    dt = '2018-08-08'
or
    dt = '2018-08-09'
group by
    userid,productid,dt;t1
筛选出昨天和今天购买过3号商品的用户
select
    userid,
    productid,
    count(dt)
from
    (select
        userid,
        productid,
        dt
    from
        sql023
    where
        dt = '2018-08-08'
    or
        dt = '2018-08-09'
    group by
        userid,productid,dt)t1
group by
    userid,productid
having
    count(dt)>1;
    
25
1,2019-08-01
1,2019-08-02
1,2019-08-03
2,2019-08-01
2,2019-08-02
3,2019-08-01
3,2019-08-03
4,2019-07-28
4,2019-07-29
4,2019-08-01
4,2019-08-02
4,2019-08-03
create table sql025
(
uid int,
dt string
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql025'into table sql025;
按用户分组加一列等差数列
select
    uid,
    dt,
    row_number() over(partition by uid order by dt) `rank`
from
    sql025;t1
日期与等差数列做差
select
    uid,
    dt,
    `rank`,
    date_sub(dt,`rank`) sub
from
    ()t1;t2
按差值汇聚,通过count来计算每次连续登录的天数
select
    uid,
    sub,
    count(uid) amount
from
    ()t2
group by
    uid,sub;t3
取最大值得到每个用户连续登录的最大天数
select
    uid,
    max(amount)
from
    (select
        uid,
        sub,
        count(uid) amount
    from
        (select
            uid,
            dt,
            `rank`,
            date_sub(dt,`rank`) sub
        from
            (select
                uid,
                dt,
                row_number() over(partition by uid order by dt) `rank`
            from
                sql025)t1)t2
    group by
        uid,sub)t3
group by
    uid;
    
32
2019-08-06,1
2019-08-06,2
2019-08-06,3
2019-08-06,4

2019-08-06,1,2
2019-08-06,2,3
2019-08-06,3,6

2019-08-06,1,55.6
2019-08-06,2,55.8

create table sql032t1
(
ds string,
user_id int
)
row format delimited
fields terminated by ',';
create table sql032t2
(
ds string,
user_id int,
read_num int
)
row format delimited
fields terminated by ',';
create table sql032t3
(
ds string,
user_id int,
price int
)
row format delimited
fields terminated by ',';
load data local inpath '/root/in/sql032t1' into table sql032t1;
load data local inpath '/root/in/sql032t2' into table sql032t2;
load data local inpath '/root/in/sql032t3' into table sql032t3;
(1)
select
    t1.user_id,
    t2.read_num
from
    sql032t1 t1
join
    sql032t2 t2
on
    t1.user_id=t2.user_id;

36
1,zhangsan,数学,80,2015
2,lisi,语文,90,2016
3,wangwu,化学,70,2017
4,zhangsan,语文,80,2015
5,zhangsan,化学,90,2015
6,lisi,语文,70,2015
create table sql037
(
id int,
userid string,
course string,
score int,
term int
)
row format delimited
fields terminated by ',';
load data local inpath'/root/in/sql037'into table sql037;
select
    id,
    userid,
    course,
    score,
    term,
    rank() over(partition by course,term order by score desc) `rank`
from
    sql037;t1
select
    id,
    userid,
    course,
    score,
    term
from
        (select
        id,
        userid,
        course,
        score,
        term,
        rank() over(partition by course,term order by score desc) `rank`
    from
        sql037)t1
where
    `rank`=1;

你可能感兴趣的:(hive,SQL练习)