安装MySQL
启动MySQL
MySQL is configured to only allow connections from localhost by default
To connect run:
mysql -u root
To start mysql now and restart at login:
brew services start mysql
使用where过滤数据
SELECT
*
FROM
actor
WHERE
last_name = 'ALLEN' OR last_name = 'DAVIS'
ORDER BY
`actor_id`;
where可以和and或者or联合使用,来圈定条件;常见的比较符:
=、<>、!=、>、<、>=、<=、IN、NOT IN、BETWEEN、LIKE、IS NULL、IS NOT NULL
统计address表格中,postal_code在区间的分布
select
sum(
case when `postal_code` < 1000 then 1 else 0
end
) as c1,
sum(
case
when (`postal_code` < 5000 and `postal_code`>=1000) then 1 else 0
end
) as c2,
sum(
case
when (`postal_code` < 10000 and `postal_code`>=5000) then 1 else 0
end
) as c3,
sum(
case when (`postal_code`>=10000) then 1 else 0
end
) as c4
from address;
通过派生表temp1,统计符合条件的数量
select
count(*) as cnt
from(
SELECT *
FROM actor
WHERE last_name = 'ALLEN' OR last_name = 'DAVIS'
ORDER BY `actor_id`
) temp1;
SELECT t1.a1 -- 所有要展示的字段,都写在这里,可能不止来源一个表
,t1.a2
,COUNT(DISTINCT t1.a1) AS cnt
,t2.a3
,t2.a4
FROM t1 -- 来源于第一个表
LEFT JOIN (
SELECT a3
,a4
FROM xxx
WHERE ds = 'xxx'
) t2 -- 第二个表
ON t1.id = t2.id -- JOIN的关联条件
WHERE t1.a1 = 'xx' -- 过滤条件
AND t1.ds = '20230711'
AND t1.a1 IS NOT NULL
GROUP BY t1.a1 -- 分组:注意,所有select中出现的字段(除聚合函数外),都需要出现在GROUP BY中
,t1.a2
,t2.a3
,t2.a4
HAVING cnt > 0 -- 对分组统计之后的最终数据做一个过滤,这里可以直接使用聚合函数as之后的别名
ORDER BY cnt DESC -- 排序
LIMIT 30
;
1.执行顺序:->from (去加载t1 和 t2这2个表 ) -> join -> on -> where -> group by->select 后面的聚合函数count,sum -> having -> distinct -> order by -> limit
2.FROM 子句执行顺序为从后往前、从右到左
SELECT ds
,SUM(CASE WHEN cnt >= 1 THEN 1 ELSE 0 END) AS fake_cnt_android
,COUNT(*) AS all_cnt_android
FROM (
SELECT user_id AS id
,SUM(CASE WHEN (location_type = '21' OR location_type = '25') THEN 1 ELSE 0 END) AS cnt
,ds
FROM xxxxxxxx
WHERE ds IN ('20230623','20230624','20230625','20230626','20230627')
AND city_name = 'xx市'
AND location_type IS NOT NULL -- 过滤掉无效数据
AND location_type != -1 -- 过滤掉xxx
GROUP BY ds
,id
) id_cnt
GROUP BY ds
;
CREATE TABLE IF NOT EXISTS temp_xxx
(
city STRING COMMENT '城市'
,line_name STRING COMMENT 'xxx'
,fake_cnt BIGINT COMMENT 'xx数量'
,all_cnt BIGINT COMMENT '总数量'
,ratio STRING COMMENT '百分比'
)
COMMENT 'xxx情况表'
PARTITIONED BY
(
ds STRING COMMENT 'day 分区字段'
)
LIFECYCLE 30
;
INSERT OVERWRITE TABLE temp_xxx PARTITION (ds='${bizdate}')
SELECT city
,line_name
,fake_cnt
,all_cnt
,CONCAT(CAST(ROUND(100 * fake_cnt / all_cnt,2) AS STRING),'%') AS ratio
FROM (
SELECT CASE mode
WHEN 'xx' THEN 'xxxxx'
WHEN 'xx' THEN 'xxxxx'
ELSE 'xxxxxx'
END AS line_name
,city
,SUM(CASE WHEN cnt >= 1 THEN 1 ELSE 0 END) AS fake_cnt
,COUNT(*) AS all_cnt
FROM (
SELECT rider_._id
,SUM(CASE WHEN location_type = '21' THEN 1 ELSE 0 END) AS cnt
,ds
,rider_.city_name AS city
,id_mode.tms_line_name AS mode
FROM xxxxxxxxxx AS rider_
LEFT JOIN (
SELECT _id AS id
,tms_line_name
FROM xxxxxxxx.xxxdi
WHERE ds = '${bizdate}'
AND tms_line_name IS NOT NULL
) id_mode
ON id_mode.id = rider_._id
WHERE ds = '${bizdate}'
AND _id IS NOT NULL
AND city_id IN ('91','66')
AND location_type IS NOT NULL -- 过滤掉无效数据
GROUP BY ds
,rider_._id
,mode
,rider_.city_name
) id_cnt
GROUP BY city
,line_name --GROUP BY ROLLUP(city,mode)
) mod_cnt
;
其中bizdate可以在调度配置中设置
参考资料:https://help.aliyun.com/zh/maxcompute/