1. 如何把时间戳转换成日期
from_unixtime(bigint unixtime, string format)
1. yyyy-MM-dd hh:mm:ss
2. yyyy-MM-dd hh
3. yyyy-MM-dd hh:mm
4. yyyyMMdd
SELECT
from_unixtime(paytime,'yyyy-MM-dd hh:mm:ss')
FROM user_trade
WHERE dt='2019-04-09';
- 拓展:把日期转换为时间戳
- unix_timestamp
SELECT
unix_timestamp(firstactivetime)
FROM user_info
WHERE level=5;
2. 如何计算日期间隔
datediff(string enddate, string startdate)
- 举例:用户首次激活时间,与2019年5月1日的日期间隔
SELECT user_name,
datediff('2019-05-01', to_date(firstactivetime))
FROM user_info
LIMIT 10;
date_add(string startdate, int days)
date_sub (string startdate, int days)
SELECT user_name,
date_add(firstactivetime,7)
FROM user_info
LIMIT 10;
3. case when 条件函数
- 统计以下4个年龄段20岁以下,20-30岁,30-40岁,40岁以上的用户分布
SELECT case when age<20 then '20岁以下'
when age<30 then '20-30岁'
when age<40 then '30-40岁'
else '40岁以上' end,
count(user_id) as user_num
FROM user_info
GROUP BY case when age<20 then '20岁以下'
when age<30 then '20-30岁'
when age<40 then '30-40岁'
else '40岁以上' end;
4. if函数
SELECT sex,
if(level>5,'高','低'),
count(user_id) as user_num
FROM user_info
GROUP BY sex,
if(level>5,'高','低');
5. 字符串函数
substr(string A, int start, int len)
SELECT substr(firstactivetime,1,7) as month,
count(user_id) as user_num
FROM user_info
GROUP BY substr(firstactivetime,1,7);
get_json_object(string json_string, string path)
SELECT get_json_object(extra1,'$.phonebrand') as phone_brand,
count(user_id) as user_num
FROM user_info
GROUP BY get_json_object(extra1,'$.phonebrand');
SELECT extra2['phonebrand'] as phone_brand,
extra2['education'] as education,
count(user_id) as user_num
FROM user_info
GROUP BY extra2['phonebrand'],extra2['education'];
6. 聚合统计函数
- ELLA用户2018年平均支付金额,以及2018年最大支付日期和最小支付日期的时间间隔
SELECT avg(pay_amount) as avg_amount,
datediff(max(from_unixtime(pay_time,'yy-MM-dd')),
min(from_unixtime(pay_time,'yy-MM-dd')))
FROM user_trade
WHERE year(dt)='2018'
and user_name='ELLA';
# 另一种写法
SELECT avg(pay_amount) as avg_amount,
datediff(from_unixtime(max(pay_time),'yy-MM-dd'),
from_unixtime(min(pay_time),'yy-MM-dd'))
FROM user_trade
WHERE year(dt)='2018'
and user_name='ELLA';
7. 综合练习
SELECT count(a.user_name)
FROM (SELECT user_name,
count(distinct goods_category) as category_num
FROM user_trade
WHERE year(dt)='2018'
GROUP BY user_name HAVING count(distinct goods_category) > 2) as a;
- 用户激活时间在2018年,年龄段在20-30岁和30-40岁的婚姻状况分布
SELECT a.age_type,
if(marriage_status=1,'已婚','未婚'),
count(a.user_id)
FROM
(SELECT case when age<20 then '20岁以下'
when age<30 then '20-30岁'
when age<40 then '30-40岁'
else '40岁以上' end as age_type,
get_json_object(extra1, '$.marriage_status') as marriage_status,
user_id
FROM user_info
WHERE year(firstactivetime)='2018') as a
WHERE a.age_type in ('20-30岁', '30-40岁')
GROUP BY a.age_type,
if(marriage_status=1,'已婚','未婚');
SELECT sex,
count(user_id)
FROM user_info
WHERE datediff('2019-08-07', substr(firstactivetime,1,10))>300
GROUP BY sex;
SELECT sex,
get_json_object(extra1,'$.education'),
count(user_id)
FROM user_info
GROUP BY sex,
get_json_object(extra1,'$.education');
- 2019年1月1日到2019年4月30日,每个时段的不同品类购买金额分布
select substr(from_unixtime(pay_time,'yyyy-MM-dd hh'),12),
goods_category,
sum(pay_amount)
from user_trade
where dt between '2019-01-01' and '2019-04-30'
group by substr(from_unixtime(pay_time,'yyyy-MM-dd hh'),12),
goods_category;
8. 常见错误
- 标点符号错误(使用了全角符号)
- 没有对子查询的表进行重命名
- 使用错误的字段名
- 丢了逗号分隔符