1. inner join
- 内连接,返回两个表的交集
- 既在user_list_1又在user_list_2的用户
SELECT *
FROM user_list_1 as a
JOIN user_list_2 as b
ON a.user_id=b.user_id;
- 表连接时,必须进行重命名
- on后面使用的连接条件必须起到唯一键值的作用
- inner可以省略不写,效果一样
- 一定要先去重,再做表连接,养成良好习惯
- 在2019年购买后又退款的用户
SELECT a.user_name
FROM
(SELECT distinct user_name
FROM user_trade
WHERE year(dt)=2019) as a
JOIN
(SELECT distinct user_name
FROM user_refund
WHERE year(dt)='2019') as b ON a.user_name=b.user_name;
SELECT a.user_name
FROM
(SELECT distinct user_name
FROM user_trade
WHERE year(dt)='2017') as a
JOIN
(SELECT distinct user_name
FROM user_trade
WHERE year(dt)='2018') as b ON a.user_name=b.user_name;
SELECT a.user_name
FROM
(SELECT distinct user_name
FROM trade_2017) as a
JOIN
(SELECT distinct user_name
FROM trade_2018) as b on a.user_name=b.user_name
JOIN
(SELECT distinct user_name
FROM trade_2019) as c on b.user_name=c.user_name;
2. left join
- 左连接,以左边为全集,返回能够匹配上右表的匹配结果,没有匹配上的显示NULL
SELECT *
FROM
user_list_1 as a
LEFT JOIN
user_list_2 as b ON a.user_id=b.user_id;
- 在user_list_1中但是不在user_list_2中
SELECT a.user_id,
a.user_name
FROM user_list_1 as a LEFT JOIN user_list_2 as b ON a.user_id=b.user_id
WHERE b.user_id is null;
SELECT a.user_name
FROM
(SELECT distinct user_name
FROM user_trade
WHERE year(dt)='2019') as a
LEFT JOIN
(SELECT distinct user_name
FROM user_refund
WHERE year(dt)='2019') as b ON a.user_name=b.user_name
WHERE b.user_name is null;
SELECT b.education,
count(a.user_name)
FROM
(SELECT distinct user_name
FROM user_trade
WHERE year(dt)='2019') as a
LEFT JOIN
(SELECT user_name,
get_json_object(extra1,'$.education') as education
FROM user_info) as b ON a.user_name=b.user_name
GROUP BY b.education;
- 在2017,2018年都购买,但是在2019年没有购买的用户
SELECT a.user_name
FROM
(SELECT distinct user_name
FROM trade_2017) as a
JOIN
(SELECT distinct user_name
FROM trade_2018) as b on a.user_name=b.user_name
LEFT JOIN
(SELECT distinct user_name
FROM trade_2019) as c on b.user_name=c.user_name
WHERE c.user_name is null;
3. full join
- full outer join 关键字只要左表和右表其中一个表存在匹配,则返回行。
- user_list_1和user_list_2的所有用户
SELECT coalesce(a.user_name,b.user_name)
FROM user_list_1 as a FULL JOIN user_list_2 as b on a.user_name=b.user_name;
- coalesce是一个函数,(expression_1,expression_2,…expression_n)依次参考各参数表达式,遇到非null值即停止并返回该值,如果所有的表达式都是空值的话,最终将返回一个空值
4. union all
- 联合所有
- 字段名必须一致
- 字段顺序必须一致
- 没有连接条件
- 将user_list_1和user_list_3合并在一起
SELECT user_name,
user_id
FROM user_list_1
UNION ALL
SELECT user_name,
user_id
FROM user_list_3;
SELECT count(distinct a.user_name)
FROM
(
SELECT distinct user_name
FROM trade_2017
UNION ALL
SELECT distinct user_name
FROM trade_2018
UNION ALL
SELECT distinct user_name
FROM trade_2019) as a;
SELECT a.user_name,
sum(a.pay_amount),
sum(a.refund_amount)
FROM
(SELECT user_name,
sum(pay_amount) as pay_amount,
0 as refund_amount
FROM user_trade
WHERE year(dt)='2019'
GROUP BY user_name
UNION ALL
SELECT user_name,
0 as pay_amount,
sum(refund_amount) as refund_amount
FROM user_refund
WHERE year(dt)='2019'
GROUP BY user_name) as a
GROUP BY a.user_name;
SELECT coalesce(a.user_name,b.user_name),
if(a.pay_amount is null,0,a.pay_amount),
if(b.refund_amount is null,0,b.refund_amount)
FROM
(SELECT user_name,
sum(pay_amount) as pay_amount
FROM user_trade
WHERE year(dt)='2019'
GROUP BY user_name) as a
FULL JOIN
(SELECT user_name,
sum(refund_amount) as refund_amount
FROM user_refund
WHERE year(dt)='2019'
GROUP BY user_name) as b ON a.user_name=b.user_name;
SELECT a.user_name,
if(a.pay_amount is null,0,a.pay_amount),
if(b.refund_amount is null,0,b.refund_amount)
FROM
(SELECT user_name,
sum(pay_amount) as pay_amount
FROM user_trade
WHERE year(dt)='2019'
GROUP BY user_name) as a
LEFT JOIN
(SELECT user_name,
sum(refund_amount) as refund_amount
FROM user_refund
WHERE year(dt)='2019'
GROUP BY user_name) as b ON a.user_name=b.user_name;
5. 综合练习
- 首次激活时间在2017年,但是一直没有支付的用户年龄段分布
SELECT a.age_level,
count(a.user_name)
FROM
(
SELECT user_name,
case when age<20 then '20岁以下'
when age<30 then '20-30岁'
when age<40 then '30-40岁'
else '40岁以上' end as age_level
FROM user_info
WHERE year(firstactivetime)='2017') as a
LEFT JOIN
(SELECT distinct user_name
FROM user_trade
WHERE dt>0) as b ON a.user_name=b.user_name
WHERE b.user_name is null
GROUP BY a.age_level;
SELECT hour(firstactivetime),
count(distinct a.user_name)
FROM
(SELECT distinct user_name
FROM trade_2018
UNION ALL
SELECT distinct user_name
FROM trade_2019) as a
LEFT JOIN user_info as b ON a.user_name=b.user_name
GROUP BY hour(firstactivetime);
SELECT c.sex,
count(a.user_name)
FROM
(SELECT user_name
FROM user_trade
WHERE year(dt)=2019
GROUP BY user_name)a
JOIN
(SELECT user_name
FROM user_refund
WHERE year(dt)=2019
GROUP BY user_name)b on a.user_name=b.user_name
LEFT JOIN
(SELECT user_name,
sex
FROM user_info)c on b.user_name=c.user_name
GROUP BY c.sex;
- 在2018年购买,但是没在2019年购买的用户的城市分布
SELECT d.city,
count(c.user_name)
FROM
(SELECT a.user_name
FROM
(SELECT user_name
FROM user_trade
WHERE year(dt)=2018
GROUP BY user_name)a
LEFT JOIN
(SELECT user_name
FROM user_trade
WHERE year(dt)=2019
GROUP BY user_name)b on a.user_name=b.user_name
WHERE b.user_name is null)c
LEFT JOIN
(SELECT user_name,
city
FROM user_info)d on c.user_name=d.user_name
GROUP BY d.city;
- 在2017-2019年,有交易但是没退款的用户的手机品牌分布
SELECT d.phonebrand,
count(c.user_name)
FROM
(SELECT a.user_name
FROM
(SELECT user_name
FROM trade_2017
union
SELECT user_name
FROM trade_2018
union
SELECT user_name
FROM trade_2019)a
LEFT JOIN
(SELECT distinct user_name
FROM user_refund
WHERE dt>'0')b on a.user_name=b.user_name
WHERE b.user_name is null)c
LEFT JOIN
(SELECT user_name,
extra2['phonebrand'] as phonebrand
FROM user_info)d on c.user_name=d.user_name
GROUP BY d.phonebrand;