行数
限制行数:末尾+limit n
查询行数:select count(*) from table
判定是否为空:is not null
聚合函数:count(xx)、sum(xx)、count(distinct xxx)、max/min/avg(xx)
union:纵向拼接
select * from OrderItems where quantity = 100
union all
select * from OrderItems where prod_id like 'BNBG%'
case when:判断是否有过购买记录,增加新的一列
if(xx, xx, xx):判断
CASE expression
WHEN condition1 THEN result1
WHEN condition2 THEN result2
...
WHEN conditionN THEN resultN
ELSE result
END
### 判断是否有过购买记录,增加新的一列
select
customer_id,
(case
when latest_place_order_date is not null then 1
else 0
end)
as if_placed_oredr
from customers_info
# 等价于
select customer_id, if(latest_place_order_date is not null,1,0) if_placed_order
from customers_info
### 两重排序
# 按城市对客户进行排序,如果城市为空,则按国家排序
select * from customers_info
order by if(city is null, country, city)
### case后grouby
select (case
when age is null then '未填写'
when age>50 then '50以上'
when age>20 and age<50 then '20-50'
else '20以下'
end) as age_group,
count(*) user_count
from customers_info
group by age_group
窗口函数
语法结构:函数 OVER ([PARTITION BY 字段名 ORDER BY 字段名 ASC|DESC])
或:函数 OVER 窗口名 … WINDOW 窗口名 AS ([PARTITION BY 字段名 ORDER BY 字段名 ASC|DESC])
# 查询每天刷题通过数最多的前二名用户id和刷题数
# row_number():对每一组数据进行编号
select date, user_id, pass_count
from(select date,user_id,pass_count,
row_number() over(partition by date order by pass_count desc) m
from questions_pass_record) a
where m<=2
order by date
# 查询用户的下一次刷题日期nextdate
# lead(expr, n):返回当前行的后n行的expr的值
select user_id,date,lead(date, 1)
over(partition by user_id
order by date asc) nextdate
from questions_pass_record
having字句
# 输出提交次数大于2次的用户ID且倒序排列
select user_id
from done_questions_record
group by user_id
having count(*)>2
order by user_id desc
# 输出提交且通过次数大于2 的用户ID且升序排列
select user_id
from done_questions_record
group by user_id
having sum(result_info)>2
order by user_id asc
# 计算三个值:
# question_pass_rate 表示每个用户不同题目的通过率(同一用户同一题重复提交通过仅计算一次);
# pass_rate 表示每个用户的提交正确率(只要有提交一次即计算一次);
# question_per_cnt表示平均每道不同的题目被提交的次数(只要有一次提交即计算一次)
select user_id,
count(distinct if(result_info=1,question_id,null))/count(distinct question_id) question_pass_rate,
sum(result_info)/count(question_id) pass_rate,
count(question_id)/count(distinct question_id) question_per_cnt
from done_questions_record
group by user_id
having question_pass_rate>0.6
order by user_id asc
时间函数
# 求出哪个小时为广告点击的高峰期,以及发生的点击次数
select hour(click_time) click_hour,
count(click_time) click_cnt
from user_ad_click_time
group by click_hour
order by click_cnt desc
limit 1
# 输出在5min内完成点击购买的用户ID
select click.user_id uid
from user_ad_click_time click, user_payment_time pay
where click.user_id=pay.user_id
and click.trace_id =pay.trace_id
and time_to_sec(timediff(pay.pay_time,click.click_time))<=300
order by click.user_id desc
字符函数
# 正则表达式LIKE
select id, comment from comment_detail
where comment like '是%' or comment like '求%'
order by id asc
# 子串substring_index(xx,',',ind)
# 输出所有第二话题为1002的评论对应的第一话题subject_id1的数量cnt
select substring_index(subject_set,',',1) as subject_id1,
count(*) as cnt
from comment_detail
where substring_index(substring_index(subject_set,',',2),',',-1)='1002'
group by substring_index(subject_set,',',1)
# 字符串替换replace(xx, 'str1', 'str2')
select id, replace(comment,',','') comment
from comment_detail
where char_length(comment)>3 # 字符串长度
经典题