hive SQL实现占比、同比、环比计算

前两天博文

《SQL实现占比(时间函数升级版)》,

现在用hive实现一下

一、数据准备

-- 创建表并插入数据
CREATE TABLE `saleorder`  (
  `order_id` int ,
  `order_time` date ,
  `order_num` int
) 

-- ----------------------------
-- Records of saleorder
-- ----------------------------
INSERT INTO `saleorder` VALUES (1, '2020-04-20', 420);
INSERT INTO `saleorder` VALUES (2, '2020-04-04', 800);
INSERT INTO `saleorder` VALUES (3, '2020-03-28', 500);
INSERT INTO `saleorder` VALUES (4, '2020-03-13', 100);
INSERT INTO `saleorder` VALUES (5, '2020-02-27', 300);
INSERT INTO `saleorder` VALUES (6, '2020-01-07', 450);
INSERT INTO `saleorder` VALUES (7, '2019-04-07', 800);
INSERT INTO `saleorder` VALUES (8, '2019-03-15', 1200);
INSERT INTO `saleorder` VALUES (9, '2019-02-17', 200);
INSERT INTO `saleorder` VALUES (10, '2019-02-07', 600);
INSERT INTO `saleorder` VALUES (11, '2019-01-13', 300);


查看表信息

select * from saleorder;

hive SQL实现占比、同比、环比计算_第1张图片

二、占比

写法一:

基本思路:用隐式内连接,外加嵌套找出分子分母,相除(最后要分组)

-- 求每年每个月份销量占全年销量的占比
SELECT
	order_month,
	num,
	total,
	round( num / total, 2 ) AS ratio 
FROM
(SELECT 
	DATE_FORMAT(order_time,"yyyy-MM") AS order_month,
	sum( order_num ) AS num
FROM
	saleorder 
GROUP BY
		DATE_FORMAT(order_time,"yyyy-MM"))t1,
	(SELECT 
	year(order_time) AS order_year,
	sum( order_num ) AS total
FROM
	saleorder 
GROUP BY
		year(order_time))t2 where substr(t1.order_month,1,4)=t2.order_year;

hive SQL实现占比、同比、环比计算_第2张图片

写法二:显示内连接

基本思路:显示内联接,先分组、汇总–>笛卡尔积连接–>相除
友情提示: 时间处理的时候除了用date_formate()也可以用substr()函数来截取年月日格式

SELECT
	order_month,
	num,
	total,
	round(num/total,2) as ratio
FROM
	(
SELECT
	substr( order_time, 1, 7 ) AS order_month,
	sum( order_num ) AS num 
FROM
	saleorder 
GROUP BY
	substr( order_time, 1, 7 ) 
	) t1
	INNER JOIN (
SELECT
	substr( order_time, 1, 4 ) AS order_year,
	sum( order_num ) AS total 
FROM
	saleorder
GROUP BY
	substr( order_time, 1, 4 ) 
	) t2 ON substr( order_month, 1, 4 ) = t2.order_year ;

hive SQL实现占比、同比、环比计算_第3张图片

写法三:开窗函数

SELECT DISTINCT
	order_month,
	num,
	total,
	round( num / total, 2 ) AS ratio 
FROM
	(
SELECT
	substr( order_time, 1, 7 ) AS order_month,
	sum( order_num ) over ( PARTITION BY substr( order_time, 1, 7 ) ) AS num,
	sum( order_num ) over ( PARTITION BY substr( order_time, 1, 4 ) ) total 
FROM
	saleorder 
	) temp;

hive SQL实现占比、同比、环比计算_第4张图片

注意:

(1) 时间处理的时候除了用date_formate()也可以用substr()函数来截取年月日格式
(2)当我们求的占比分子分母没有时间维度只有数量的时候,我们可以采用on 1=1 来进行关联,构造成笛卡尔积

例如下列代码:

-- 平台订单占比
SELECT NAME
	电商平台,
	number 订单量,
	concat( format( number / total * 100, 2 ), '%' ) 平台占比 
FROM
	(
SELECT
	* 
FROM
	(
SELECT
	b.NAME,
	count( a.id ) number 
FROM
	B2C_ORDER a
	JOIN PLATFORM b ON a.PLATFORM_ID = b.id 
GROUP BY
	b.NAME 
	) t1
	INNER JOIN ( SELECT count( a.id ) total FROM ORDER a ) t2 ON 1 = 1 
	) t3 

hive SQL实现占比、同比、环比计算_第5张图片
例2:

SELECT 
	month_order,
	year_order,
	number,
	concat( round( number / total * 100.00, 2 ), '%' ) percent 
FROM
	(
SELECT
	* 
FROM
	( SELECT substr( order_time, 1,7 ) AS month_order, sum( order_num )
	 AS number 
	 FROM `order` 
	GROUP BY substr( order_time, 1,7 ) ) t1
	JOIN 
	( SELECT DATE_FORMAT( order_time, 'Y' ) AS year_order,sum( order_num ) AS total
	 FROM `order` 
	GROUP BY  DATE_FORMAT( order_time, 'Y' )) t2 
	ON 1 = 1
	AND date_format( concat( t1.month_order, '-01' ), 'Y' ) = t2.year_order 
	) t3;

三、环比

与上年度数据对比称"同比",与上月数据对比称"环比"

相关公式如下:
同比增长率计算公式
(当年值-上年值)/上年值x100% 

环比增长率计算公式
(当月值-上月值)/上月值x100% 
SELECT
	now_month,
	now_num,
	last_num,
	round( ( now_num - last_num ) / last_num, 2 ) AS ratio 
FROM
	(
SELECT
	now_month,
	now_num,
	lag ( t1.now_num, 1 ) over ( ORDER BY t1.now_month ) AS last_num 
FROM
	(
SELECT
	substr( order_time, 1, 7 ) AS now_month,
	sum( order_num ) AS now_num 
FROM
	saleorder 
GROUP BY
	substr( order_time, 1, 7 ) 
	) t1 
	) t2;

hive SQL实现占比、同比、环比计算_第6张图片
也可以对显示结果稍微优化一下:

-- 对空值进行处理、加%显示
SELECT
	now_month,
	now_num,
	last_num,
	concat( nvl ( round( ( now_num - last_num ) / last_num * 100, 2 ), 0 ), "%" ) 
	AS ratio 
FROM
	(
SELECT
	now_month,
	now_num,
	lag ( t1.now_num, 1 ) over ( ORDER BY t1.now_month ) AS last_num 
FROM
	(
SELECT
	substr( order_time, 1, 7 ) AS now_month,
	sum( order_num ) AS now_num 
FROM
	saleorder 
GROUP BY
	substr( order_time, 1, 7 ) 
	) t1 
	) t2;

hive SQL实现占比、同比、环比计算_第7张图片

三、同比

与上年度数据对比称"同比",与上月数据对比称"环比"

相关公式如下:
同比增长率计算公式
(当年值-上年值)/上年值x100% 

环比增长率计算公式
(当月值-上月值)/上月值x100% 

同比的话,如果每个月都齐全,都有数据lag(num,12)就ok.。我们的例子中只有19年和20年1-4月份的数据。这种特殊情况应该如何处理?

写法一:本案例进行单独处理

有4个月数据,我就lag(num,4)

SELECT
	now_month,
	now_num,
	last_num,
	round( ( now_num - last_num ) / last_num, 2 ) AS ratio 
FROM
	(
SELECT
	now_month,
	now_num,
	lag ( t1.now_num, 4 ) over ( ORDER BY t1.now_month ) AS last_num 
FROM
	(
SELECT
	substr( order_time, 1, 7 ) AS now_month,
	sum( order_num ) AS now_num 
FROM
	saleorder 
GROUP BY
	substr( order_time, 1, 7 ) 
	) t1 
	) t2;

hive SQL实现占比、同比、环比计算_第8张图片
优化:
对空值可以做一下优化处理,用到nvl()函数和lag()函数的第三个参数。

SELECT
	now_month,
	now_num,
	last_num,
	nvl ( round( ( now_num - last_num ) / last_num, 2 ), 0 ) AS ratio 
FROM
	(
SELECT
	now_month,
	now_num,
	lag ( t1.now_num, 4, 0 ) over ( ORDER BY t1.now_month ) AS last_num 
FROM
	(
SELECT
	substr( order_time, 1, 7 ) AS now_month,
	sum( order_num ) AS now_num 
FROM
	saleorder 
GROUP BY
	substr( order_time, 1, 7 ) 
	) t1 
	) t2;

hive SQL实现占比、同比、环比计算_第9张图片

写法二:通用方法

基本思路:利用date_add()生成跨年时间

SELECT
	t1.now_month,
CASE
	
	WHEN now_num IS NULL 
	OR now_num = 0 THEN
	0 ELSE now_num 
END now_num,
CASE
		
		WHEN last_num IS NULL 
		OR last_num = 0 THEN
			0 ELSE last_num 
		END last_num,
CASE
		
		WHEN last_num IS NULL 
		OR last_num = 0 THEN
			0 ELSE round( ( now_num - last_num ) / last_num, 2 ) 
		END ratio 
FROM
	(
	SELECT
		DATE_FORMAT( order_time, 'yyyy-MM' ) AS now_month,
		sum( order_num ) AS now_num 
	FROM
		saleorder 
	GROUP BY
		DATE_FORMAT( order_time, 'yyyy-MM' ) 
	) t1
	LEFT JOIN (
	SELECT
		DATE_FORMAT( DATE_ADD( order_time, 365 ), 'yyyy-MM' ) AS now_month,
		sum( order_num ) AS last_num 
	FROM
		saleorder 
	GROUP BY
	DATE_FORMAT( DATE_ADD( order_time, 365 ), 'yyyy-MM' ) 
) AS t2 ON t1.now_month = t2.now_month;

hive SQL实现占比、同比、环比计算_第10张图片
优化:
nvl()代替case…when

SELECT
	t1.now_month,
	nvl ( now_num, 0 ) AS now_num,
	nvl ( last_num, 0 ) AS last_num,
	nvl ( round( ( now_num - last_num ) / last_num, 2 ), 0 ) AS ratio 
FROM
	(
SELECT
	DATE_FORMAT( order_time, 'yyyy-MM' ) AS now_month,
	sum( order_num ) AS now_num 
FROM
	saleorder 
GROUP BY
	DATE_FORMAT( order_time, 'yyyy-MM' ) 
	) t1
	LEFT JOIN (
SELECT
	DATE_FORMAT( DATE_ADD( order_time, 365 ), 'yyyy-MM' ) AS now_month,
	sum( order_num ) AS last_num 
FROM
	saleorder 
GROUP BY
	DATE_FORMAT( DATE_ADD( order_time, 365 ), 'yyyy-MM' ) 
	) AS t2 ON t1.now_month = t2.now_month;

效果是一样的
hive SQL实现占比、同比、环比计算_第11张图片

你可能感兴趣的:(Hive)