dwd_ride_date_term_dd
, 包含user_id
(用户id
)、ride_dt
(骑行日期)以及dt_term
(日期对应节气)NULL
思路:
很明显,我们只要计算出立春、立夏、立秋、立冬
四个节气的具体日期(开始日期和结束日期),然后和骑行日期进行比较即可:
立春 <= date < 立夏 -> 春
立夏 <= date < 立秋 -> 夏
立秋 <= date < 立冬 -> 秋
立冬 <= date < 立春 -> 冬
create table dwd_ride_date_term_dd (
user_id varchar(20),
ride_dt varchar(20),
dt_term varchar(20)
);
INSERT INTO dwd_ride_date_term_dd VALUES
('001','20240101',NULL),
('001','20240203','立春'),
('001','20240311',NULL),
('001','20240401',NULL),
('001','20240505','立夏'),
('001','20240522',NULL),
('001','20240807','立秋'),
('001','20241028',NULL),
('001','20241107','立冬'),
('001','20241217',NULL),
('001','20250101',NULL),
('001','20250203','立春'),
('001','20250311',NULL),
('001','20250401',NULL),
('001','20250505','立夏'),
('001','20250522',NULL),
('001','20250807','立秋'),
('001','20251028',NULL),
('001','20251107','立冬'),
('001','20251217',NULL)
;
with t1 as (
select
user_id,
ride_dt as start_dt,
dt_term,
lead(ride_dt, 1, null) over(partition by user_id order by ride_dt) as end_dt
from dwd_ride_date_term_dd
where dt_term is not null
)
select
t2.user_id,
t2.ride_dt,
case
when t1.dt_term = '立春' then '春'
when t1.dt_term = '立夏' then '夏'
when t1.dt_term = '立秋' then '秋'
when t1.dt_term = '立冬' then '冬'
else '冬' end as season
from dwd_ride_date_term_dd t2
left join t1
on t1.user_id = t2.user_id
and t2.ride_dt >= t1.start_dt
and (t2.ride_dt < t1.end_dt or t1.end_dt is null);
lead(ride_dt, 1, '20991231') over(partition by user_id order by ride_dt) as end_dt
优点:
end_dt
字段总是有一个明确的值,这对于某些应用场景来说可能是必要的。end_dt
始终有一个有效日期的系统或应用,这种方法更符合预期。缺点:
lead(ride_dt, 1, null) over(partition by user_id order by ride_dt) as end_dt
。
优点:
end_dt
字段为null
,这使得处理缺失数据变得更加灵活。缺点:
end_dt
必须有一个有效日期的系统或应用,这种方法可能不够理想,因为它允许end_dt
为null
。end_dt
字段时,需要额外的逻辑来处理null
值(语句末尾的(t2.ride_dt < t1.end_dt or t1.end_dt is null
)