sql求平均日活_n套SQL面试题--行转列、留存、日活等

第一套

面试题1.请写出表一中各地市客户数、总费用(ARPU之和) 的SQL语句

2.请写出表一中各地市ARPU(0,30),[30,50),[50-80),[80以上)客户数分别是多少的SQL语句

3.表二中用户有重复的记录,请写出提取2条及以上用户的SQL语句

解答1

select city,count(useid) '客户数',sum(ARPU)

from t1

group by city;

解答2

select city,

sum(case when ARPU>0 and ARPU<30 then 1 else 0 end) as 'ARPU(0,30)客户数',

sum(case when ARPU>=30 and ARPU<50 then 1 else 0 end) as 'ARPU[30,50)客户数',

sum(case when ARPU>=50 and ARPU<80 then 1 else 0 end) as 'ARPU[50,80)客户数',

sum(case when ARPU>=80 then 1 else 0 end) as 'ARPU[80以上)客户数'

from t1

group by city;

解答3

select userid

from t2

group by userid

having count(userid)>=2;

第二套

思路:第一步--按照用户和场景分组,求每组的最近访问时间,按照用户和访问时间 升序排列,记为表1,目的是为了避免重复场景出现。

第二步-- 使用窗口函数,对表1用户名进行分组,按照访问时间升序排列,记为表2;

第三步-- 筛选表2每组访问时间的前两名,然后对用户进行分组,使用group_concat函数对分组后的场景进行连接,最后使用concat函数将用户名和联结好的场景再次联结。

select concat(t2.userid,'-',group_concat(t2.changjing separator '-'))

from

(select userid,changjing,inttime,row_number() over(partition by userid order by userid,inttime asc) ranking

from

(select userid,changjing, min(inttime) inttime

from datafrog_test1

group by userid,changjing

order by userid,inttime) t1)t2

where ranking<=2

group by userid;

第三套

1.建数据库和表

create database camera;

use camera;

create table userinfo(

uid varchar(10),

app_name varchar(20),

duration int(10),

times int(10),

dayno varchar(30)

);

2.导入数据

日期格式处理(不处理也可以)

update userinfo set dayno=str_to_date(dayno,'%Y-%m-%d');

1.每天的活跃用户数

select dayno as '日期',count(distinct uid) as '活跃用户数'

from userinfo

where app_name='相机'

group by dayno;

2.次日留存

select u.dayno 日期,count(distinct s.uid) '次日留存'

from userinfo u

left join userinfo s

on u.uid = s.uid

AND

DATEDIFF(s.dayno,u.dayno)=1

where u.app_name='相机'

and

s.app_name='相机'

group by u.dayno;

次日留存,三日留存,7日留存等只需要更改日期相差的天数即可。

因为这道题需要一条sql语句得到图中的结果,那么我们再求留存时可以通过另一种方法。

select

a.dayno 日期,count(distinct a.uid) 活跃,

count(distinct case when datediff(b.dayno,a.dayno)=1 then a.uid end) 次留,

count(distinct case when datediff(b.dayno,a.dayno)=3 then a.uid end) 三留,

count(distinct case when datediff(b.dayno,a.dayno)=7 then a.uid end) 七留,

concat(count(distinct case when datediff(b.dayno,a.dayno)=1 then a.uid end)/count(distinct a.uid)*100,'%') 次日留存率,

concat(count(distinct case when datediff(b.dayno,a.dayno)=3 then a.uid end)/count(distinct a.uid)*100,'%') 三日留存率,

concat(count(distinct case when datediff(b.dayno,a.dayno)=7 then a.uid end)/count(distinct a.uid)*100,'%') 七日留存率

from userinfo a

left join userinfo b

on a.uid=b.uid

where a.app_name='相机'

AND

b.app_name='相机'

group by a.dayno;

第四套

1.建表

CREATE database test_school;

use test_school;

create table course (

id varchar(20),

teacher_id varchar(20),

week_day varchar(20),

has_course varchar(20)

);

insert into course value

(1,1,2,"Yes"),

(2,1,3,"Yes"),

(3,2,1,"Yes"),

(4,3,2,"Yes"),

(5,1,2,"Yes");

2.行转列

select teacher_id,

case when week_day=1 then has_course end mon,

case when week_day=2 then has_course end tue,

case when week_day=3 then has_course end thi,

case when week_day=4 then has_course end thu,

case when week_day=5 then has_course end fri

from course;

第五套

1.建表

create table a1 (

name varchar(20),

english int,

maths int,

music int);

insert into a1 values

("Jim",90,88,99);

2.列转行

select name,'english' as subject,english as score from a1

union

select name,'maths' as subject,maths as score from a1

union

select name,'music' as subject,music as score from a1;

第六套

1.建表

create table A2 (

FDATE datetime,

value int

);

insert into a2 values

("2018/11/23",10),

("2018/12/31",3),

("2019/2/9",53),

("2019/3/31",23),

("2019/7/8",11),

("2019/7/31",10);

select * from a2;

2.添加索引

create index id_FDATE on a2(FDATE);

show index from a2;

3.解答

select FYEAR,FMONTH,value,

sum(VALUE) over(PARTITION by fyear order by fmonth) ysum,

sum(VALUE) over(order by fdate) sum

from(

SELECT fdate,year(FDATE) AS FYEAR, month(Fdate) AS FMONTH,

SUM(VALUE) AS value

FROM A2

GROUP BY FYEAR,FMONTH

ORDER BY FYEAR, FMONTH) b;

第七套

1.建表

create table userlog

(

id int ,

name varchar(10),

EmailAddress varchar(50),

lastlogon varchar(50)

);

insert into userlog values(100,'test4','[email protected]','2007-11-25 16:31:26');

insert into userlog values(13,'test1','[email protected]','2007-3-22 16:27:07');

insert into userlog values(19,'test1','[email protected]','2007-10-25 14:13:46');

insert into userlog values(42,'test1','[email protected]','2007-10-25 14:20:10');

insert into userlog values(45,'test2','[email protected]','2007-4-25 14:17:39');

insert into userlog values(49,'test2','[email protected]','2007-5-25 14:22:36');

2.第一问

select t1.name,max(t1.lastlogon),max(t1.days),count(distinct days) from

(

select

id,name,

emailAddress,

date_format(lastlogon,'%Y-%m-%d %H:%i:%s') as lastlogon,

date_format(lastlogon,'%Y-%m-%d') as days

from userlog as u

) as t1

group by t1.name;

3.第二问

DROP TABLE IF EXISTS tmp_table;

CREATE TEMPORARY TABLE tmp_table

select name,lastlogon,

row_number() over(partition by name order by date_format(lastlogon, '%Y-%m-%d %H:%i:%s')) as num_logontime,

dense_rank() over(partition by name order by date_format(lastlogon,'%Y-%m-%d')) as num_logonday

from userlog;

select * from tmp_table;

第八套

1.建表

create table tableA (qq int(20),

game varchar(20));

insert into tableA values

(10000,"a"),

(10000,"b"),

(10000,"c"),

(20000,"c"),

(20000,"d");

2.解答a

drop table if exists tableB;

create TEMPORARY table tableB

(

select qq,group_concat(game separator"-") as game

from tableA group by qq);

select * from tableB;

2.解答b

select qq,

substring_index(substring_index(game,"-",help_topic_id+1),"-",-1) as game

from tableB a

left join mysql.help_topic as b

on help_topic_id < (length(game)-length(replace(game,"-",""))+1);

第九套

2.建表导入数据

2.1用户活跃模型表

2.2红包参与领取模型表

2.3用户活跃模型表数据导入

2.4红包参与领取模型表数据导入

关注公众号 DataLion ,回复 ‘面试题数据’即可领取。

3.解答

3.1 计算2019年6月1日至今,每日DAU(活跃用户量,即有登陆的用户)

Select imp_date,

count(qimei) dau

from tmp_liujg_dau_based

where imp_date>'20190601'

Group by imp_date;

3.2 计算20190601至今,每日领取红包的新用户数,老用户数,及人均领取金额,人均领取次数

select a.imp_date,

count( case when is_new='新用户' then qimei else null end) as '新用户数',

count( case when is_new='老用户' then qimei else null end) as '老用户数',

count( case when is_new='未登录用户' then qimei else null end) as '未登录用户',

round(sum(a.add_money)/count(distinct a.qimei),2) as '人均领取金额',

round(count(a.qimei)/count(distinct a.qimei),0) as '人均领取次数'

from

(

select p.imp_date,p.qimei,p.add_money,

(case

when d.is_new=1 then '新用户'

when d.is_new=0 then '老用户'

else '未登录用户'

end) as is_new

from tmp_liujg_packed_based p

left join tmp_liujg_dau_based d

on p.imp_date=d.imp_date and p.qimei=d.qimei

where p.imp_date>='20190601'

) a

group by a.imp_date;

3.3 计算2019年3月至今,每个月按领红包取天数为1、2、3……30、31天区分,计算取每个月领取红包的用户数,人均领取金额,人均领取次数

select left(IMp_date,6) month,

count(distinct imp_date) get_money_days,

count(distinct qimei) user_count,

round(sum(add_money)/count(distinct qimei),2) mean_money_get,

round(count(qimei)/count(distinct qimei),2) mean_money_count

from tmp_liujg_packed_based

group by left(IMp_date,6);

3.4 计算2019年3月至今,每个月领过红包用户和未领红包用户的数量,平均月活跃天数(即本月平均活跃多少天)

思路:先写出每个月领红包的用户 记为表1,将改表与活跃用户表右联结,根据表1空值与否来区分红包用户还是非红包用户。

Select

left(cc.imp_date,6) 月份,

count(distinct case when cc.is_packet_user='红包用户' then cc.qimei else null end) '红包用户数',

Count(distinct case when cc.is_packet_user='非红包用户' then cc.qimei else null end) '非红包用户数',

Count(distinct is_packet_user)/Count(distinct cc.qimei) '月活跃天数'

from

(Select

a.imp_date, a.qimei,b.qimei hb_qimei,

Case when b.qimei is not null then '红包用户' else '非红包用户' end is_packet_user,

Case when b.qimei is not null then b.qimei else a.qimei end is_qimei

from tmp_liujg_dau_based a

Left join

(select distinct left(imp_date,6) imp_date ,qimei from tmp_liujg_packed_based where imp_date >= '20190301' )b

On left(a.imp_date,6) = b.imp_date and a.qimei = b.qimei)cc

Group by left(cc.imp_date,6);

3.5 计算2019年3月至今,每个月活跃用户的注册日期,2019年3月1日前注册的用户日期填空即可

思路:先写出每个用户的注册日期,也就是每个新用户最小的日期,将改表和活跃用户表右联结。

select left(a.imp_date,6) month,

a.qimei,

b.imp_date

from tmp_liujg_dau_based a

left join(

select qimei,min(imp_date) imp_date

from tmp_liujg_dau_based

where

is_new=1

group by qimei) b

on a.qimei=b.qimei

group by left(a.imp_date,6),a.qimei

order by left(a.imp_date,6);

第十套

1 建表

create table game(

department varchar(20),

game_name varchar(20),

game_id int(10)

);

create table income(

uid int(10),

game_id int(10),

income_money int(10),

income_time varchar(10)

);

insert into game values

('业务1','开心消消乐',1000),

('业务2','阴阳师',1001),

('业务1','刺激战场',1005),

('业务4','王者荣耀',1007),

('业务5','欢乐斗地主',1002);

insert into income values

(2333,1000,168,'2020-04-01'),

(2333,1000,268,'2020-04-02'),

(2333,1001,30,'2020-03-03'),

(2334,1005,6,'2020-03-04'),

(2336,1005,1,'2020-03-05'),

(2339,1007,648,'2020-03-06'),

(2338,1007,648,'2020-03-08'),

(2338,1007,648,'2020-03-08')

(2333,1000,168,'2020-03-01'),

(2333,1000,60,'2020-03-01');

2 解答

select department,game_name,sum(income_money) sum_income_money

from income i

join game g

on i.game_id=g.game_id

where income_time between '2020-01-01' and '2020-03-31'

GROUP BY department,game_name;

题目参考数据分析SQL面试题目9套汇总​www.jianshu.com

会持续不断的更新,欢迎大家批评指正。

我的公众号 DataLion,欢迎大家关注。

你可能感兴趣的:(sql求平均日活)