SQL统计实际在职人数
问题 分两种情况:
- 每月在职人数, 只要本月在职, 就算做1人在职
- 本月在职几天, 在职人数=在职天数/本月天数
情况一:
问题: 有员工信息表, 统计近 3个月, 每个月实际在职人数(只要本月有在职,不管在职几天,就算做1人在职)
员工信息表: user_id(用户id), start_date(入职时间), end_date(离职时间,null代表未离职)
日期月份表: mt(月份)
1. 数据准备
WITH t_emp_info AS (
SELECT * FROM (
VALUES (1001, '2023-08-14', '2023-09-23'),
(1002, '2023-09-10', '2023-10-25'),
(1003, '2023-09-16', '2023-12-15'),
(1004, '2023-09-21', NULL ),
(1005, '2023-10-12', '2023-11-21'),
(1006, '2023-10-16', '2023-10-28'),
(1007, '2023-10-20', '2023-12-02'),
(1008, '2023-10-26', '2023-11-26'),
(1009, '2023-10-28', NULL ),
(1010, '2023-11-02', '2023-11-26'),
(1011, '2023-11-06', '2023-12-16'),
(1012, '2023-11-12', '2023-12-25')
) AS table_name(user_id, start_date, end_date)
)
, t_mt AS (
SELECT * FROM (
VALUES
('2023-09'), ('2023-10'), ('2023-11')
) AS table_name(mt)
)
2. 代码实现
SELECT
a.mt, b.user_id, start_date, end_date
FROM t_mt a
LEFT JOIN t_emp_info b
ON a.mt >= DATE_FORMAT(b.start_date, 'yyyy-MM') AND a.mt <= DATE_FORMAT(NVL(b.end_date, '2023-11-30'), 'yyyy-MM')
;
mt |
user_id |
start_date |
end_date |
2023-09 |
1001 |
2023-08-14 |
2023-09-23 |
2023-09 |
1002 |
2023-09-10 |
2023-10-25 |
2023-09 |
1003 |
2023-09-16 |
2023-12-15 |
2023-09 |
1004 |
2023-09-21 |
\N |
2023-10 |
1002 |
2023-09-10 |
2023-10-25 |
2023-10 |
1003 |
2023-09-16 |
2023-12-15 |
2023-10 |
1004 |
2023-09-21 |
\N |
2023-10 |
1005 |
2023-10-12 |
2023-11-21 |
2023-10 |
1006 |
2023-10-16 |
2023-10-28 |
2023-10 |
1007 |
2023-10-20 |
2023-12-02 |
… |
… |
… |
… |
SELECT
mt, COUNT(DISTINCT user_id) AS user_ct
FROM (
SELECT
a.mt, b.user_id, start_date, end_date
FROM t_mt a
LEFT JOIN t_emp_info b
ON a.mt >= DATE_FORMAT(b.start_date, 'yyyy-MM') AND a.mt <= DATE_FORMAT(NVL(b.end_date, '2023-11-30'), 'yyyy-MM')
) c
GROUP BY mt
;
mt |
user_ct |
2023-09 |
4 |
2023-10 |
8 |
2023-11 |
9 |
情况二:
问题: 有员工信息表, 统计近 3个月, 每个月实际在职人数( 在职人数=在职天数/本月天数 )
员工信息表: user_id(用户id), start_date(入职时间), end_date(离职时间,null代表未离职)
日期表: dt_date(日期)
1. 数据准备
WITH t_emp_info AS (
SELECT * FROM (
VALUES (1001, '2023-08-14', '2023-09-23'),
(1002, '2023-09-10', '2023-10-25'),
(1003, '2023-09-16', '2023-12-15'),
(1004, '2023-09-21', NULL ),
(1005, '2023-10-12', '2023-11-21'),
(1006, '2023-10-16', '2023-10-28'),
(1007, '2023-10-20', '2023-12-02'),
(1008, '2023-10-26', '2023-11-26'),
(1009, '2023-10-28', NULL ),
(1010, '2023-11-02', '2023-11-26'),
(1011, '2023-11-06', '2023-12-16'),
(1012, '2023-11-12', '2023-12-25')
) AS table_name(user_id, start_date, end_date)
)
, t_dt AS (
SELECT * FROM (
VALUES
('2023-09-10'), ('2023-09-20')
, ('2023-09-01'), ('2023-09-11'), ('2023-09-21')
, ('2023-09-02'), ('2023-09-12'), ('2023-09-22')
, ('2023-09-03'), ('2023-09-13'), ('2023-09-23')
, ('2023-09-04'), ('2023-09-14'), ('2023-09-24')
, ('2023-09-05'), ('2023-09-15'), ('2023-09-25')
, ('2023-09-06'), ('2023-09-16'), ('2023-09-26')
, ('2023-09-07'), ('2023-09-17'), ('2023-09-27')
, ('2023-09-08'), ('2023-09-18'), ('2023-09-28')
, ('2023-09-09'), ('2023-09-19'), ('2023-09-29')
, ('2023-09-30'), ('2023-10-10'), ('2023-10-20')
, ('2023-10-01'), ('2023-10-11'), ('2023-10-21')
, ('2023-10-02'), ('2023-10-12'), ('2023-10-22')
, ('2023-10-03'), ('2023-10-13'), ('2023-10-23')
, ('2023-10-04'), ('2023-10-14'), ('2023-10-24')
, ('2023-10-05'), ('2023-10-15'), ('2023-10-25')
, ('2023-10-06'), ('2023-10-16'), ('2023-10-26')
, ('2023-10-07'), ('2023-10-17'), ('2023-10-27')
, ('2023-10-08'), ('2023-10-18'), ('2023-10-28')
, ('2023-10-09'), ('2023-10-19'), ('2023-10-29')
, ('2023-10-30'), ('2023-11-10'), ('2023-11-20')
, ('2023-11-01'), ('2023-11-11'), ('2023-11-21')
, ('2023-11-02'), ('2023-11-12'), ('2023-11-22')
, ('2023-11-03'), ('2023-11-13'), ('2023-11-23')
, ('2023-11-04'), ('2023-11-14'), ('2023-11-24')
, ('2023-11-05'), ('2023-11-15'), ('2023-11-25')
, ('2023-11-06'), ('2023-11-16'), ('2023-11-26')
, ('2023-11-07'), ('2023-11-17'), ('2023-11-27')
, ('2023-11-08'), ('2023-11-18'), ('2023-11-28')
, ('2023-11-09'), ('2023-11-19'), ('2023-11-29')
, ('2023-11-30'), ('2023-10-31')
) AS table_name(dt_date)
)
2. 代码实现
SELECT
a.dt_date, b.user_id, b.start_date, b.end_date
FROM t_dt a
LEFT JOIN t_emp_info b
ON a.dt_date BETWEEN b.start_date AND NVL(b.end_date, '2023-11-30')
;
dt_date |
user_id |
start_date |
end_date |
2023-09-10 |
1001 |
2023-08-14 |
2023-09-23 |
2023-09-20 |
1001 |
2023-08-14 |
2023-09-23 |
2023-09-01 |
1001 |
2023-08-14 |
2023-09-23 |
2023-09-11 |
1001 |
2023-08-14 |
2023-09-23 |
2023-09-21 |
1001 |
2023-08-14 |
2023-09-23 |
2023-09-02 |
1001 |
2023-08-14 |
2023-09-23 |
2023-09-12 |
1001 |
2023-08-14 |
2023-09-23 |
2023-09-22 |
1001 |
2023-08-14 |
2023-09-23 |
2023-09-03 |
1001 |
2023-08-14 |
2023-09-23 |
2023-09-13 |
1001 |
2023-08-14 |
2023-09-23 |
2023-09-23 |
1001 |
2023-08-14 |
2023-09-23 |
2023-09-04 |
1001 |
2023-08-14 |
2023-09-23 |
… |
… |
… |
… |
SELECT
DATE_FORMAT(dt_date, 'yyyy-MM') AS mt
, COUNT(user_id) AS user_day_ct
, COUNT(DISTINCT dt_date) AS mt_ct
, ROUND(COUNT(user_id) / COUNT(DISTINCT dt_date) , 3) AS user_ct
FROM (
SELECT
a.dt_date, b.user_id, b.start_date, b.end_date
FROM t_dt a
LEFT JOIN t_emp_info b
ON a.dt_date BETWEEN b.start_date AND NVL(b.end_date, '2023-11-30')
) c
GROUP BY DATE_FORMAT(dt_date, 'yyyy-MM')
;
SELECT
DATE_FORMAT(dt_date, 'yyyy-MM') AS mt
, COUNT(user_id) AS user_day_ct
, COUNT(DISTINCT dt_date) AS mt_ct
, ROUND(COUNT(user_id) / COUNT(DISTINCT dt_date) , 3) AS user_ct
FROM (
SELECT
a.dt_date, b.user_id, b.start_date, b.end_date
FROM t_dt a
LEFT JOIN t_emp_info b
) c
WHERE dt_date BETWEEN start_date AND NVL(end_date, '2023-11-30')
GROUP BY DATE_FORMAT(dt_date, 'yyyy-MM')
;
mt |
user_day_ct |
mt_ct |
user_ct |
2023-09 |
69 |
30 |
2.3 |
2023-10 |
142 |
31 |
4.581 |
2023-11 |
236 |
30 |
7.867 |
end