实际在职人数

SQL统计实际在职人数

问题 分两种情况:

  1. 每月在职人数, 只要本月在职, 就算做1人在职
  2. 本月在职几天, 在职人数=在职天数/本月天数

情况一:

问题: 有员工信息表, 统计近 3个月, 每个月实际在职人数(只要本月有在职,不管在职几天,就算做1人在职)
 员工信息表: user_id(用户id), start_date(入职时间), end_date(离职时间,null代表未离职)
 日期月份表: mt(月份)
1. 数据准备
-- 数据准备
WITH t_emp_info AS ( -- 员工信息表
SELECT * FROM (
  VALUES    (1001, '2023-08-14', '2023-09-23'),
            (1002, '2023-09-10', '2023-10-25'),
            (1003, '2023-09-16', '2023-12-15'),
            (1004, '2023-09-21', NULL ),
            (1005, '2023-10-12', '2023-11-21'),
            (1006, '2023-10-16', '2023-10-28'),
            (1007, '2023-10-20', '2023-12-02'),
            (1008, '2023-10-26', '2023-11-26'),
            (1009, '2023-10-28', NULL ),
            (1010, '2023-11-02', '2023-11-26'),
            (1011, '2023-11-06', '2023-12-16'),
            (1012, '2023-11-12', '2023-12-25')
) AS table_name(user_id, start_date, end_date)
)

, t_mt AS ( -- 日期月份表
SELECT * FROM (
  VALUES
    ('2023-09'), ('2023-10'), ('2023-11')
) AS table_name(mt)
)
2. 代码实现
-- 1. 关联,月份在职人员情况
SELECT 
    /*+ mapjoin(b) */  -- MC 运行环境要求, 没有使用 MapJoin 不能使用笛卡尔积, join 条件不能使用范围类型
      a.mt, b.user_id, start_date, end_date
FROM t_mt a
LEFT JOIN t_emp_info b 
ON  a.mt >= DATE_FORMAT(b.start_date, 'yyyy-MM') AND a.mt <= DATE_FORMAT(NVL(b.end_date, '2023-11-30'), 'yyyy-MM')
 ;
mt user_id start_date end_date
2023-09 1001 2023-08-14 2023-09-23
2023-09 1002 2023-09-10 2023-10-25
2023-09 1003 2023-09-16 2023-12-15
2023-09 1004 2023-09-21 \N
2023-10 1002 2023-09-10 2023-10-25
2023-10 1003 2023-09-16 2023-12-15
2023-10 1004 2023-09-21 \N
2023-10 1005 2023-10-12 2023-11-21
2023-10 1006 2023-10-16 2023-10-28
2023-10 1007 2023-10-20 2023-12-02
-- 2. 统计每月在职人数
SELECT 
    mt, COUNT(DISTINCT user_id) AS user_ct
FROM (
    SELECT 
        /*+ mapjoin(b) */  -- MC 运行环境要求, 没有使用 MapJoin 不能使用笛卡尔积, join 条件不能使用范围类型
          a.mt, b.user_id, start_date, end_date
    FROM t_mt a
    LEFT JOIN t_emp_info b 
    ON  a.mt >= DATE_FORMAT(b.start_date, 'yyyy-MM') AND a.mt <= DATE_FORMAT(NVL(b.end_date, '2023-11-30'), 'yyyy-MM')
) c 
GROUP BY mt
;
mt user_ct
2023-09 4
2023-10 8
2023-11 9

情况二:

问题: 有员工信息表, 统计近 3个月, 每个月实际在职人数( 在职人数=在职天数/本月天数 )
 员工信息表: user_id(用户id), start_date(入职时间), end_date(离职时间,null代表未离职)
 日期表: dt_date(日期)
1. 数据准备
WITH t_emp_info AS ( -- 员工信息表
SELECT * FROM (
  VALUES    (1001, '2023-08-14', '2023-09-23'),
            (1002, '2023-09-10', '2023-10-25'),
            (1003, '2023-09-16', '2023-12-15'),
            (1004, '2023-09-21', NULL ),
            (1005, '2023-10-12', '2023-11-21'),
            (1006, '2023-10-16', '2023-10-28'),
            (1007, '2023-10-20', '2023-12-02'),
            (1008, '2023-10-26', '2023-11-26'),
            (1009, '2023-10-28', NULL ),
            (1010, '2023-11-02', '2023-11-26'),
            (1011, '2023-11-06', '2023-12-16'),
            (1012, '2023-11-12', '2023-12-25')
) AS table_name(user_id, start_date, end_date)
)

, t_dt AS ( -- 日期表
SELECT * FROM (
  VALUES
                      ('2023-09-10'), ('2023-09-20')
    , ('2023-09-01'), ('2023-09-11'), ('2023-09-21')
    , ('2023-09-02'), ('2023-09-12'), ('2023-09-22')
    , ('2023-09-03'), ('2023-09-13'), ('2023-09-23')
    , ('2023-09-04'), ('2023-09-14'), ('2023-09-24')
    , ('2023-09-05'), ('2023-09-15'), ('2023-09-25')
    , ('2023-09-06'), ('2023-09-16'), ('2023-09-26')
    , ('2023-09-07'), ('2023-09-17'), ('2023-09-27')
    , ('2023-09-08'), ('2023-09-18'), ('2023-09-28')
    , ('2023-09-09'), ('2023-09-19'), ('2023-09-29')
    , ('2023-09-30'), ('2023-10-10'), ('2023-10-20')
    , ('2023-10-01'), ('2023-10-11'), ('2023-10-21')
    , ('2023-10-02'), ('2023-10-12'), ('2023-10-22')
    , ('2023-10-03'), ('2023-10-13'), ('2023-10-23')
    , ('2023-10-04'), ('2023-10-14'), ('2023-10-24')
    , ('2023-10-05'), ('2023-10-15'), ('2023-10-25')
    , ('2023-10-06'), ('2023-10-16'), ('2023-10-26')
    , ('2023-10-07'), ('2023-10-17'), ('2023-10-27')
    , ('2023-10-08'), ('2023-10-18'), ('2023-10-28')
    , ('2023-10-09'), ('2023-10-19'), ('2023-10-29')
    , ('2023-10-30'), ('2023-11-10'), ('2023-11-20')
    , ('2023-11-01'), ('2023-11-11'), ('2023-11-21')
    , ('2023-11-02'), ('2023-11-12'), ('2023-11-22')
    , ('2023-11-03'), ('2023-11-13'), ('2023-11-23')
    , ('2023-11-04'), ('2023-11-14'), ('2023-11-24')
    , ('2023-11-05'), ('2023-11-15'), ('2023-11-25')
    , ('2023-11-06'), ('2023-11-16'), ('2023-11-26')
    , ('2023-11-07'), ('2023-11-17'), ('2023-11-27')
    , ('2023-11-08'), ('2023-11-18'), ('2023-11-28')
    , ('2023-11-09'), ('2023-11-19'), ('2023-11-29')
    , ('2023-11-30'), ('2023-10-31')
) AS table_name(dt_date)
)
2. 代码实现
-- 1. 日期和用户关联
SELECT 
    /*+ mapjoin(b) */  -- MC 运行环境要求, 没有使用 MapJoin 不能使用笛卡尔积, join 条件不能使用范围类型
    a.dt_date, b.user_id, b.start_date, b.end_date
FROM t_dt a 
LEFT JOIN t_emp_info b  
ON a.dt_date BETWEEN b.start_date AND NVL(b.end_date, '2023-11-30') 
;
dt_date user_id start_date end_date
2023-09-10 1001 2023-08-14 2023-09-23
2023-09-20 1001 2023-08-14 2023-09-23
2023-09-01 1001 2023-08-14 2023-09-23
2023-09-11 1001 2023-08-14 2023-09-23
2023-09-21 1001 2023-08-14 2023-09-23
2023-09-02 1001 2023-08-14 2023-09-23
2023-09-12 1001 2023-08-14 2023-09-23
2023-09-22 1001 2023-08-14 2023-09-23
2023-09-03 1001 2023-08-14 2023-09-23
2023-09-13 1001 2023-08-14 2023-09-23
2023-09-23 1001 2023-08-14 2023-09-23
2023-09-04 1001 2023-08-14 2023-09-23
-- 2. 统计每月实际在职人数
SELECT 
      DATE_FORMAT(dt_date, 'yyyy-MM') AS mt -- 月份
    , COUNT(user_id) AS user_day_ct -- 在职人天数(本月所有在职人, 总共在职多少天)
    , COUNT(DISTINCT dt_date) AS mt_ct -- 本月天数
    , ROUND(COUNT(user_id) / COUNT(DISTINCT dt_date) , 3) AS user_ct 
FROM (
    SELECT 
        /*+ mapjoin(b) */  -- MC 运行环境要求, 没有使用 MapJoin 不能使用笛卡尔积, join 条件不能使用范围类型
        a.dt_date, b.user_id, b.start_date, b.end_date
    FROM t_dt a 
    LEFT JOIN t_emp_info b  
    ON a.dt_date BETWEEN b.start_date AND NVL(b.end_date, '2023-11-30') 
) c
GROUP BY DATE_FORMAT(dt_date, 'yyyy-MM')
;

-- 2. 方法二: 笛卡尔积
SELECT 
      DATE_FORMAT(dt_date, 'yyyy-MM') AS mt -- 月份
    , COUNT(user_id) AS user_day_ct -- 在职人天数(本月所有在职人, 总共在职多少天)
    , COUNT(DISTINCT dt_date) AS mt_ct -- 本月天数
    , ROUND(COUNT(user_id) / COUNT(DISTINCT dt_date) , 3) AS user_ct 
FROM (
    SELECT -- join 时使用 笛卡尔积, 下游进行过滤
        /*+ mapjoin(b) */  -- MC 运行环境要求, 没有使用 MapJoin 不能使用笛卡尔积, join 条件不能使用范围类型
        a.dt_date, b.user_id, b.start_date, b.end_date
    FROM t_dt a 
    LEFT JOIN t_emp_info b  
) c
WHERE dt_date BETWEEN start_date AND NVL(end_date, '2023-11-30') 
GROUP BY DATE_FORMAT(dt_date, 'yyyy-MM')
;
mt user_day_ct mt_ct user_ct
2023-09 69 30 2.3
2023-10 142 31 4.581
2023-11 236 30 7.867
end

你可能感兴趣的:(SQL,大数据,sql,大数据)