#选出城市在北京,性别为女的是个用户
select user_name
from user_info
where city = 'beijing' and sex = 'female'
limit 10;
#选出在2019年4月9号,购买的商品品类是food的用户名、购买数量、支付金额
select user_name,piece,pay_amount
from user_trade
where dt = '2019-04-09' and goods_category = 'food';
#2019年一月到四月,每个品类有多少人购买,累计金额是多少
select goods_category,
count(distinct user_name) as num,
sum(pay_amount) as total_amount
from user_trade
where dt between '2019-01-01' and '2019-04-30'
group by goods_category;
#group by的作用:分类汇总
常用聚合函数:
1.count():计数count(distinct...)去重计数
2.sum():求和
3.avg():平均值
4.max():最大值
5.min():最小值
group by .....having
#2019年4月,支付金额超过5万元的用户
select user_name,
sum(pay_amount) as total_amount
from user_trade
where dt between '2019-04-01' and '2019-04-30'
group by user_name
having sum(pay_amount)>50000;
#having :对group by 的对象进行筛选
#2019年4月,支付金额最多的top5用户
select user_name,
sum(pay_amount) as total_amount
from user_trade
where dt between '2019-04-01' and '2019-04-30'
group by user_name
order by total_amount desc limit 5;
#常用函数
1、如何把时间戳转化为日期?
select pay_time,
from_unixtime(pay_time,'yyyy-MM-dd hh:mm:ss')
from user_trade
where dt = '2019-04-09';
#如何计算日期间隔
用户的首次激活时间,与2019年5月1日的日期间隔
select user_name,
datediff('2019-05-01',to_date(firstactivetime))
from user_info
limit 10;
条件函数
case when
#统计一下四个年龄段20岁以下,20-30岁,40岁以上的用户数:
select count(distinct user_id) user_num,
case when age<20 then '2osui'
when age>=20 and age<30 then '20-30sui'
when age>=30 and age<40 then '30-40sui'
else '40suiyihang' end as age_type
from user_info
group by case when age<20 then '2osui'
when age>=20 and age<30 then '20-30sui'
when age>=30 and age<40 then '30-40sui'
else '40suiyihang' end;
#if函数
#统计每个性别用户等级高低的分布情况(level大于5为高级)
select sex,
if (level>5,'高','低') as level_type,
count(distinct user_id) user_num
from user_info
group by sex,
if (level>5,'高','低');
from user_info
4、字符串函数
#每个月新激活的用户数
select count(distinct user_id) as user_num,
substr(firstactivetime,1,7) as month
from user_info
group by substr(firstactivetime,1,7);
substr(string A,int start,int len)
备注:如果不指定截取长度则从起始位一直截取到最后
select get_json_object(extra1,'$.phonebrand') as phone_brand,
count(distinct user_id) user_num
from user_info
group by get_json_object(extra1,'$.phonebrand');
5、聚合统计函数
#ELLA用户的2018年的平均支付金额,以及2018年最大的支付日期与最小的支付日期的间隔
select avg(pay_amount) as avg_amount,
datediff(max(from_unixtime(pay_time,'yyyy-MM-dd')),
min(from_unixtime(pay_time,'yyyy-MM-dd')))
from user_trade
where year(dt)='2018'
and user_name = 'ELLA';
#2018年购买的商品品类在两个以上的用户数
select count(a.user_name)
from
(select user_name,
count(distinct goods_category)as category_num
from user_trade
where year(dt)='2018'
group by user_name having count(distinct goods_category)>2)as a;
#用户激活时间在2018年,年龄段在20-30岁和30-40岁的婚姻状况分布
select a.age_type,
if(a.marraige_status=1,'已婚','未婚'),
count(distinct a.user_id)
from
(select case when age<20 then '20岁以下'
when age>=20 and age<30 then '20-30岁'
when age>=30 and age<40 then '30-40岁'
else '40岁以上' end as age_type,
get_json_object(extra1,'$.marraige_status') as marraige_status,
user_id
from user_info
where to_date(firstactivetime) between '2018-01-01' and '2018-12-31') as a
where a.age_type in ('20-30岁','30-40岁')
group by a.age_type,
if (a.marraige_status=1,'已婚','未婚');