前两天博文
现在用hive实现一下
-- 创建表并插入数据
CREATE TABLE `saleorder` (
`order_id` int ,
`order_time` date ,
`order_num` int
)
-- ----------------------------
-- Records of saleorder
-- ----------------------------
INSERT INTO `saleorder` VALUES (1, '2020-04-20', 420);
INSERT INTO `saleorder` VALUES (2, '2020-04-04', 800);
INSERT INTO `saleorder` VALUES (3, '2020-03-28', 500);
INSERT INTO `saleorder` VALUES (4, '2020-03-13', 100);
INSERT INTO `saleorder` VALUES (5, '2020-02-27', 300);
INSERT INTO `saleorder` VALUES (6, '2020-01-07', 450);
INSERT INTO `saleorder` VALUES (7, '2019-04-07', 800);
INSERT INTO `saleorder` VALUES (8, '2019-03-15', 1200);
INSERT INTO `saleorder` VALUES (9, '2019-02-17', 200);
INSERT INTO `saleorder` VALUES (10, '2019-02-07', 600);
INSERT INTO `saleorder` VALUES (11, '2019-01-13', 300);
查看表信息
select * from saleorder;
基本思路:用隐式内连接,外加嵌套找出分子分母,相除(最后要分组)
-- 求每年每个月份销量占全年销量的占比
SELECT
order_month,
num,
total,
round( num / total, 2 ) AS ratio
FROM
(SELECT
DATE_FORMAT(order_time,"yyyy-MM") AS order_month,
sum( order_num ) AS num
FROM
saleorder
GROUP BY
DATE_FORMAT(order_time,"yyyy-MM"))t1,
(SELECT
year(order_time) AS order_year,
sum( order_num ) AS total
FROM
saleorder
GROUP BY
year(order_time))t2 where substr(t1.order_month,1,4)=t2.order_year;
基本思路:显示内联接,先分组、汇总–>笛卡尔积连接–>相除
友情提示: 时间处理的时候除了用date_formate()
也可以用substr()
函数来截取年月日格式
SELECT
order_month,
num,
total,
round(num/total,2) as ratio
FROM
(
SELECT
substr( order_time, 1, 7 ) AS order_month,
sum( order_num ) AS num
FROM
saleorder
GROUP BY
substr( order_time, 1, 7 )
) t1
INNER JOIN (
SELECT
substr( order_time, 1, 4 ) AS order_year,
sum( order_num ) AS total
FROM
saleorder
GROUP BY
substr( order_time, 1, 4 )
) t2 ON substr( order_month, 1, 4 ) = t2.order_year ;
SELECT DISTINCT
order_month,
num,
total,
round( num / total, 2 ) AS ratio
FROM
(
SELECT
substr( order_time, 1, 7 ) AS order_month,
sum( order_num ) over ( PARTITION BY substr( order_time, 1, 7 ) ) AS num,
sum( order_num ) over ( PARTITION BY substr( order_time, 1, 4 ) ) total
FROM
saleorder
) temp;
(1) 时间处理的时候除了用
date_formate()
也可以用substr()
函数来截取年月日格式
(2)当我们求的占比分子分母没有时间维度只有数量的时候,我们可以采用on 1=1
来进行关联,构造成笛卡尔积
例如下列代码:
-- 平台订单占比
SELECT NAME
电商平台,
number 订单量,
concat( format( number / total * 100, 2 ), '%' ) 平台占比
FROM
(
SELECT
*
FROM
(
SELECT
b.NAME,
count( a.id ) number
FROM
B2C_ORDER a
JOIN PLATFORM b ON a.PLATFORM_ID = b.id
GROUP BY
b.NAME
) t1
INNER JOIN ( SELECT count( a.id ) total FROM ORDER a ) t2 ON 1 = 1
) t3
SELECT
month_order,
year_order,
number,
concat( round( number / total * 100.00, 2 ), '%' ) percent
FROM
(
SELECT
*
FROM
( SELECT substr( order_time, 1,7 ) AS month_order, sum( order_num )
AS number
FROM `order`
GROUP BY substr( order_time, 1,7 ) ) t1
JOIN
( SELECT DATE_FORMAT( order_time, 'Y' ) AS year_order,sum( order_num ) AS total
FROM `order`
GROUP BY DATE_FORMAT( order_time, 'Y' )) t2
ON 1 = 1
AND date_format( concat( t1.month_order, '-01' ), 'Y' ) = t2.year_order
) t3;
与上年度数据对比称"同比",与上月数据对比称"环比"
相关公式如下:
同比增长率计算公式
(当年值-上年值)/上年值x100%
环比增长率计算公式
(当月值-上月值)/上月值x100%
SELECT
now_month,
now_num,
last_num,
round( ( now_num - last_num ) / last_num, 2 ) AS ratio
FROM
(
SELECT
now_month,
now_num,
lag ( t1.now_num, 1 ) over ( ORDER BY t1.now_month ) AS last_num
FROM
(
SELECT
substr( order_time, 1, 7 ) AS now_month,
sum( order_num ) AS now_num
FROM
saleorder
GROUP BY
substr( order_time, 1, 7 )
) t1
) t2;
-- 对空值进行处理、加%显示
SELECT
now_month,
now_num,
last_num,
concat( nvl ( round( ( now_num - last_num ) / last_num * 100, 2 ), 0 ), "%" )
AS ratio
FROM
(
SELECT
now_month,
now_num,
lag ( t1.now_num, 1 ) over ( ORDER BY t1.now_month ) AS last_num
FROM
(
SELECT
substr( order_time, 1, 7 ) AS now_month,
sum( order_num ) AS now_num
FROM
saleorder
GROUP BY
substr( order_time, 1, 7 )
) t1
) t2;
与上年度数据对比称"同比",与上月数据对比称"环比"
相关公式如下:
同比增长率计算公式
(当年值-上年值)/上年值x100%
环比增长率计算公式
(当月值-上月值)/上月值x100%
同比的话,如果每个月都齐全,都有数据lag(num,12)
就ok.。我们的例子中只有19年和20年1-4月份的数据。这种特殊情况应该如何处理?
有4个月数据,我就lag(num,4)
SELECT
now_month,
now_num,
last_num,
round( ( now_num - last_num ) / last_num, 2 ) AS ratio
FROM
(
SELECT
now_month,
now_num,
lag ( t1.now_num, 4 ) over ( ORDER BY t1.now_month ) AS last_num
FROM
(
SELECT
substr( order_time, 1, 7 ) AS now_month,
sum( order_num ) AS now_num
FROM
saleorder
GROUP BY
substr( order_time, 1, 7 )
) t1
) t2;
优化:
对空值可以做一下优化处理,用到nvl()
函数和lag()
函数的第三个参数。
SELECT
now_month,
now_num,
last_num,
nvl ( round( ( now_num - last_num ) / last_num, 2 ), 0 ) AS ratio
FROM
(
SELECT
now_month,
now_num,
lag ( t1.now_num, 4, 0 ) over ( ORDER BY t1.now_month ) AS last_num
FROM
(
SELECT
substr( order_time, 1, 7 ) AS now_month,
sum( order_num ) AS now_num
FROM
saleorder
GROUP BY
substr( order_time, 1, 7 )
) t1
) t2;
基本思路:利用date_add()
生成跨年时间
SELECT
t1.now_month,
CASE
WHEN now_num IS NULL
OR now_num = 0 THEN
0 ELSE now_num
END now_num,
CASE
WHEN last_num IS NULL
OR last_num = 0 THEN
0 ELSE last_num
END last_num,
CASE
WHEN last_num IS NULL
OR last_num = 0 THEN
0 ELSE round( ( now_num - last_num ) / last_num, 2 )
END ratio
FROM
(
SELECT
DATE_FORMAT( order_time, 'yyyy-MM' ) AS now_month,
sum( order_num ) AS now_num
FROM
saleorder
GROUP BY
DATE_FORMAT( order_time, 'yyyy-MM' )
) t1
LEFT JOIN (
SELECT
DATE_FORMAT( DATE_ADD( order_time, 365 ), 'yyyy-MM' ) AS now_month,
sum( order_num ) AS last_num
FROM
saleorder
GROUP BY
DATE_FORMAT( DATE_ADD( order_time, 365 ), 'yyyy-MM' )
) AS t2 ON t1.now_month = t2.now_month;
SELECT
t1.now_month,
nvl ( now_num, 0 ) AS now_num,
nvl ( last_num, 0 ) AS last_num,
nvl ( round( ( now_num - last_num ) / last_num, 2 ), 0 ) AS ratio
FROM
(
SELECT
DATE_FORMAT( order_time, 'yyyy-MM' ) AS now_month,
sum( order_num ) AS now_num
FROM
saleorder
GROUP BY
DATE_FORMAT( order_time, 'yyyy-MM' )
) t1
LEFT JOIN (
SELECT
DATE_FORMAT( DATE_ADD( order_time, 365 ), 'yyyy-MM' ) AS now_month,
sum( order_num ) AS last_num
FROM
saleorder
GROUP BY
DATE_FORMAT( DATE_ADD( order_time, 365 ), 'yyyy-MM' )
) AS t2 ON t1.now_month = t2.now_month;