SET @rownum:=0; -- 注意:SQL语句中“:=”表示赋值,“=”表示比较,“@”表示变量
SELECT city.*,@rownum:=@rownum+1 as rownum
FROM city;
SELECT city.*,@rownum:=@rownum+1 as rownum
FROM city,(SELECT @rownum:=0) a;
-- MySQL8.0以上版本支持开窗函数,跟Oracle分析函数用法一样。
SELECT city.*,row_number() over(ORDER BY population) as rownum
FROM city;
SELECT emp.*,ROWNUM,ROWID FROM emp;
非常经典的SQL编程面试题,比如查找NBA连续夺冠的队伍起止年份,查找各日期段内的银行贷款利率等等。
测试数据如下:
mysql> select * from t2;
+------+
| a |
+------+
| 1 |
| 2 |
| 3 |
| 100 |
| 101 |
| 103 |
| 104 |
| 105 |
+------+
8 rows in set (0.00 sec)
– 列和行号之间的差值可以反应是否为连续数值
mysql> SELECT a,rn,a-rn FROM (
-> SELECT t2.*,@a:=@a+1 rn
-> FROM t2,(SELECT @a:=0) AS t
-> ) AS b;
+------+------+------+
| a | rn | a-rn |
+------+------+------+
| 1 | 1 | 0 |
| 2 | 2 | 0 |
| 3 | 3 | 0 |
| 100 | 4 | 96 |
| 101 | 5 | 96 |
| 103 | 6 | 97 |
| 104 | 7 | 97 |
| 105 | 8 | 97 |
+------+------+------+
8 rows in set, 2 warnings (0.00 sec)
– 根据差值分组统计,MIN(a)就是起始范围,MAX(a)就是终止范围
mysql> SELECT a,MIN(a) as start_range, MAX(a) as end_range
-> FROM (
-> SELECT a,rn,a-rn
-> FROM (
-> SELECT t2.*,@a:=@a+1 rn
-> FROM t2,(SELECT @a:=0) AS t
-> ) AS b
-> ) AS c
-> GROUP BY a-rn;
+------+-------------+-----------+
| a | start_range | end_range |
+------+-------------+-----------+
| 1 | 1 | 3 |
| 100 | 100 | 101 |
| 103 | 103 | 105 |
+------+-------------+-----------+
3 rows in set, 2 warnings (0.00 sec)
原始数据如下:
mysql> select * from NBA t;
+----------+------+
| team | y |
+----------+------+
| 活塞 | 1990 |
| 公牛 | 1991 |
| 公牛 | 1992 |
| 公牛 | 1993 |
| 火箭 | 1994 |
| 火箭 | 1995 |
| 公牛 | 1996 |
| 公牛 | 1997 |
| 公牛 | 1998 |
| 马刺 | 1999 |
| 湖人 | 2000 |
| 湖人 | 2001 |
| 湖人 | 2002 |
| 马刺 | 2003 |
| 活塞 | 2004 |
| 马刺 | 2005 |
| 热火 | 2006 |
| 马刺 | 2007 |
| 凯尔特人 | 2008 |
| 湖人 | 2009 |
| 湖人 | 2010 |
+----------+------+
21 rows in set (0.11 sec)
解题思路
-- 2.将查找结果作为子查询,加上行号
SELECT t3.*,@rn:=@rn+1 AS rn
FROM(
-- 1.用自关联查找出连续夺冠的队伍
SELECT t1.*,t2.y as yy
FROM nba AS t1,nba as t2
WHERE t1.team=t2.team
and t1.y=t2.y-1
) t3,(SELECT @rn:=0) AS t;
-- 3.用年份和行号差值,和team一起分组,min(y)和max(yy)就是连续夺冠队伍的起止年份
-- 注意:with加别名的方式在MySQL8.0以上版本才支持,5.0以上的版本还是要套用子查询
WITH a AS (
SELECT t3.*,@rn:=@rn+1 AS rn
FROM(
SELECT t1.*,t2.y as yy
FROM nba AS t1,nba as t2
WHERE t1.team=t2.team
and t1.y=t2.y-1
) t3,(SELECT @rn:=0) AS t
)
SELECT team, min(y) AS sYear,MAX(yy) AS eYear
FROM a
GROUP BY team,(a.y-rn);
查询结果:
另外一个方法是用lag()或者lead()分析函数代替步骤1的自关联,MySQL8.0以上版本支持
数据表TABLE1(发布日期、贷款利率)存有银行分布的历次一年期贷款利率。例如:
发布日期 贷款利率
2010-01-01 5.1%
2010-10-01 5.1%
2011-01-01 6.0%
2012-10-31 6.0%
2012-11-10 6.0%
2012-12-31 6.0%
2013-03-31 5.9%
2013-09-01 5.5%
2014-05-01 5.5%
2015-01-01 5.1%
2016-06-01 5.1%
2017-09-01 5.1%
这个表只记录了离散日期的数据,日期间的贷款利率和最近日期的一致,但是存在相邻记录的贷款利率相同的情况下。为方便获得每天的一年期银行贷款利率,我们设计了如下数据表TABLE2(起始日期,截止日期,贷款利率),存放各日期段内的银行贷款利率,并把相邻相同的数据做了去重处理。TABLE1数据生成TABLE2的数据记录示意如下:
起始日期 截止日期 贷款利率
2010-01-01 2010-12-31 5.1%
2011-01-01 2013-03-30 6.0%
2013-03-31 2013-08-31 5.9%
2013-09-01 2014-12-31 5.5%
2015-01-01 9999-12-31 5.1%
现在需要写SQL,根据TABLE1表生成TABLE2表。
解题思路(Oracle 11g的SQL写法):
-- 1.查找相邻时间段,利率发生变化的数据
/*方法1:LEAD分析函数*/
SELECT r_date,loan_rate
FROM (
SELECT table1.*,
LAG(r_date,1) OVER(ORDER BY r_date) AS r_date2,
LAG(loan_rate,1,'0') OVER(ORDER BY r_date) AS loan_rate2
FROM table1
) a
WHERE loan_rate<>loan_rate2
;
/*方法2:自关联*/
WITH a AS(
SELECT ROWNUM AS rn, table1.*
FROM table1
)
SELECT a1.r_date AS sdate,a1.loan_rate
FROM a a1 LEFT JOIN a a2
ON a1.rn=a2.rn+1
WHERE a1.loan_rate<>a2.loan_rate OR a2.r_date IS NULL
ORDER BY sdate
;
-- 2.将上述发生变化的日期作为起始时间,查找截至时间
SELECT b.sdate
,LEAD(b.sdate-1,1,to_date('9999-12-31','yyyy-mm-dd')) OVER(ORDER BY b.sdate) AS edate
,b.loan_rate
FROM (
WITH a AS(
SELECT ROWNUM AS rn, table1.*
FROM table1
)
SELECT a1.r_date AS sdate,a1.loan_rate
FROM a a1 LEFT JOIN a a2
ON a1.rn=a2.rn+1
WHERE a1.loan_rate<>a2.loan_rate OR a2.r_date IS NULL
ORDER BY sdate
) b