补充月份,这个需求是计算每个小组前三个月的平均产能,前三个月指的是都是自然月,比如现在是2023年7月,所以现在计算的前三个月的平均产能就是2023年4月到6月的平均产能,但是这些月份可能不是连续的
方法一:把日期补齐
WITH t1 AS (
SELECT group_id,
group_name,
all_pert,
stat_month
FROM dws_group_user_month_stat
WHERE dt = '2023-07-10'
and group_name is not null
and group_id = '00a62545-e66f-4961-9f39-8f4b5c894d19'
)
,months_table AS (
SELECT group_name,
group_id,
min_month,
max_month,
add_months(min_month, i) as cur_month
FROM (
SELECT group_name,
group_id,
min(stat_month) AS min_month,
max(stat_month) AS max_month
FROM t1
-- WHERE group_id = '508b3e79-6e9a-446a-ac29-1f4428a006af'
GROUP BY group_name, group_id
) t1
LATERAL VIEW posexplode(split(space(cast(months_between(max_month, min_month) as int)), ' ')) pe AS i, num
)
select group_name,
group_id,
cur_month,
min_month,
max_month,
all_pert,
AVG(all_pert) OVER (PARTITION BY group_name, group_id ORDER BY cur_month ROWS BETWEEN 3 PRECEDING AND 1 PRECEDING) AS average_sales
from (
SELECT months_table.group_name,
months_table.group_id,
cur_month,
min_month,
max_month,
nvl(all_pert, 0) all_pert
FROM t1
right JOIN months_table
ON t1.group_name = months_table.group_name
and t1.stat_month = cur_month
)t1;
方法二:
将数据炸开成三份
with t1 AS (
select
month_diff,
stat_month,
all_pert,
group_name
from dws_group_month_stat lateral view explode(array(1,2,3)) tmp as month_diff
where dt = '${datestr}'
)
,t2 as (
select all_pert,
t1.stat_month,
group_name,
add_months(stat_month,month_diff) cur_month
from t1
)
insert overwrite table dws_group_three_month_stat partition (dt='${datestr}')
select
group_name,
cur_month,
three_month_pert/3 three_month_pert
from (
select cur_month,
sum(all_pert) three_month_pert,
group_name
from t2
group by cur_month,group_name
)t3;"
补齐日期:
select staff_id,
start_date,
end_date,
date_add(start_date, i) real_date,
dayofweek(date_add(start_date, i)) real_week,
code
from t3
LATERAL VIEW posexplode(split(space(DATEDIFF(end_date, start_date)), ' ')) pe AS i, num
group by staff_id, start_date, end_date, i, code, date_add(start_date, i),dayofweek(date_add(start_date, i)),code
这里使用的是datediff