题目
Table: Failed
+--------------+---------+
| Column Name | Type |
+--------------+---------+
| fail_date | date |
+--------------+---------+
该表主键为 fail_date。
该表包含失败任务的天数.
Table: Succeeded
+--------------+---------+
| Column Name | Type |
+--------------+---------+
| success_date | date |
+--------------+---------+
该表主键为 success_date。
该表包含成功任务的天数.
系统 每天 运行一个任务。每个任务都独立于先前的任务。任务的状态可以是失败或是成功。
编写一个 SQL 查询 2019-01-01 到 2019-12-31 期间任务连续同状态 period_state 的起止日期(start_date 和 end_date)。即如果任务失败了,就是失败状态的起止日期,如果任务成功了,就是成功状态的起止日期。
最后结果按照起始日期 start_date 排序
查询结果样例如下所示:
Failed table:
+-------------------+
| fail_date |
+-------------------+
| 2018-12-28 |
| 2018-12-29 |
| 2019-01-04 |
| 2019-01-05 |
+-------------------+
Succeeded table:
+-------------------+
| success_date |
+-------------------+
| 2018-12-30 |
| 2018-12-31 |
| 2019-01-01 |
| 2019-01-02 |
| 2019-01-03 |
| 2019-01-06 |
+-------------------+
Result table:
+--------------+--------------+--------------+
| period_state | start_date | end_date |
+--------------+--------------+--------------+
| succeeded | 2019-01-01 | 2019-01-03 |
| failed | 2019-01-04 | 2019-01-05 |
| succeeded | 2019-01-06 | 2019-01-06 |
+--------------+--------------+--------------+
结果忽略了 2018 年的记录,因为我们只关心从 2019-01-01 到 2019-12-31 的记录
从 2019-01-01 到 2019-01-03 所有任务成功,系统状态为 "succeeded"。
从 2019-01-04 到 2019-01-05 所有任务失败,系统状态为 "failed"。
从 2019-01-06 到 2019-01-06 所有任务成功,系统状态为 "succeeded"。
生成数据
CREATE TABLE Failed(
fail_date DATE);
CREATE TABLE Succeeded(
success_date DATE);
INSERT INTO Failed VALUE('2018-12-28'), ('2018-12-29'),('2019-01-04'),('2019-01-05');
INSERT INTO Succeeded VALUE('2018-12-30'), ('2018-12-31'),('2019-01-01'),('2019-01-02'),('2019-01-03'),('2019-01-06');
解答
将两表进行合并
(SELECT F.`fail_date` AS DATE, 'Failed' AS lag
FROM Failed AS F)
UNION ALL
(SELECT S.success_date AS DATE, 'Succeeded' AS lag
FROM Succeeded AS S);
选出2019年的记录 并按时间降序排序
SELECT *
FROM ((SELECT F.`fail_date` AS DATE, 'Failed' AS lag
FROM Failed AS F)
UNION ALL
(SELECT S.success_date AS DATE, 'Succeeded' AS LAG
FROM Succeeded AS S)) AS all_result
WHERE YEAR(all_result.date) = 2019
ORDER BY all_result.Date ASC;
如果连续为同一个lag则分为一组 lag变动则组号+1
SELECT all_result.date, all_result.lag,
@group_id:=IF(all_result.lag = @pre_lag, @group_id, @group_id +1) AS group_id,
@pre_lag:= all_result.lag
FROM ((SELECT F.`fail_date` AS DATE, 'Failed' AS lag
FROM Failed AS F)
UNION ALL
(SELECT S.success_date AS DATE, 'Succeeded' AS LAG
FROM Succeeded AS S)) AS all_result,
(SELECT @group_id:=0, @pre_lag:=NULL) AS init
WHERE YEAR(all_result.date) = 2019
ORDER BY all_result.Date ASC;
按group_id进行分组 选出最小的日期为起始日期 最大的日期为结束日期 lag为对应的状态
SELECT tmp.lag AS period_state, MIN(tmp.date) AS start_date, MAX(tmp.date) AS end_date
FROM (SELECT all_result.date, all_result.lag,
@group_id:=IF(all_result.lag = @pre_lag, @group_id, @group_id +1) AS group_id,
@pre_lag:= all_result.lag
FROM ((SELECT F.`fail_date` AS DATE, 'Failed' AS lag
FROM Failed AS F)
UNION ALL
(SELECT S.success_date AS DATE, 'Succeeded' AS LAG
FROM Succeeded AS S)) AS all_result,
(SELECT @group_id:=0, @pre_lag:=NULL) AS init
WHERE YEAR(all_result.date) = 2019
ORDER BY all_result.Date ASC) AS tmp
GROUP BY tmp.group_id
后边想了一下会不会有什么缺陷 如果时间不连续 还需要判断当前日期与前一日期的差值是否为1 只有当当前lag和前一lag一致且当前日期与前一日期的差值为1时保持一样的分组 否则加1
SELECT all_result.date, all_result.lag,
@group_id:=IF(all_result.lag = @pre_lag AND DATEDIFF(all_result.date, @pre_date) = 1, @group_id, @group_id +1) AS group_id,
@pre_lag:= all_result.lag,
@pre_date:= all_result.date
FROM ((SELECT F.`fail_date` AS DATE, 'Failed' AS lag
FROM Failed AS F)
UNION ALL
(SELECT S.success_date AS DATE, 'Succeeded' AS LAG
FROM Succeeded AS S)) AS all_result,
(SELECT @group_id:=0, @pre_lag:=NULL,@pre_date:=NULL) AS init
WHERE YEAR(all_result.date) = 2019
ORDER BY all_result.Date ASC;
但其实是我想多了。。 因为把两个整合起来时间就是连续的 以上的考虑是多于的 但写进去也不影响
别的解答
和我的思想一致的
SELECT if(task_result = 0, 'failed', 'succeeded') AS period_state
, MIN(date) AS start_date, MAX(date) AS end_date
FROM (
SELECT date, task_result
, @group_id := if(@last_result = task_result, @group_id, @group_id + 1) AS group_id
, @last_result := task_result
FROM (
SELECT fail_date AS date, 0 AS task_result
FROM Failed
UNION
SELECT success_date AS date, 1 AS task_result
FROM Succeeded
) a, (
SELECT @group_id := 0, @last_result := 0
) temp
WHERE date BETWEEN '2019-01-01' AND '2019-12-31'
ORDER BY date ASC
) b
GROUP BY group_id
ORDER BY start_date ASC
分别处理
这时就要看时间是否连续作为分组依据了 如果连续则
SELECT success_date, IF(DATEDIFF(success_date, @prev)=1, @count, @count:=@count+1) num, @prev:=success_date
FROM Succeeded, (SELECT @prev:=NULL, @count:=0)i
WHERE YEAR(success_date)='2019'
SELECT fail_date, IF(DATEDIFF(fail_date, @prev)=1, @count, @count:=@count+1) num, @prev:=fail_date
FROM Failed, (SELECT @prev:=NULL, @count:=0)i
WHERE YEAR(fail_date)='2019'
分组查询最大最小日期然后合并即可
select * from
((select 'succeeded' period_state, min(success_date)start_date, max(success_date)end_date
from (select success_date, if(datediff(success_date, @prev)=1, @count, @count:=@count+1) num, @prev:=success_date
from Succeeded, (select @prev:=null, @count:=0)i
where year(success_date)='2019') t
group by t.num)
union all
(select 'failed' period_state, min(fail_date)start_date, max(fail_date)end_date
from (select fail_date, if(datediff(fail_date, @prev)=1, @count, @count:=@count+1) num, @prev:=fail_date
from Failed, (select @prev:=null, @count:=0)i
where year(fail_date)='2019') t
group by t.num)) t
order by start_date;