HiveQL学习笔记(五):Hive练习题

本系列是本人对Hive的学习进行一个整理,主要包括以下内容:
1.HiveQL学习笔记(一):Hive安装及Hadoop,Hive原理简介
2.HiveQL学习笔记(二):Hive基础语法与常用函数
3.HiveQL学习笔记(三):Hive表连接
4.HiveQL学习笔记(四):Hive窗口函数
5.HiveQL学习笔记(五):Hive练习题
接下来对第五个内容进行介绍。

HiveQL学习笔记(二)对应的练习题

HiveQL学习笔记(二):Hive基础语法与常用函数
这里没有原版的数据,只有字段名,所以只能靠脑补……主要是锻炼思考问题的方法,也可以上网找一些MySQL的题,然后用HiveSQL去完成。
HiveQL学习笔记(五):Hive练习题_第1张图片
在这里插入图片描述

select user_name
from user_info
where city='beijing'
and sex='female'
limit 10;

HiveQL学习笔记(五):Hive练习题_第2张图片
在这里插入图片描述

select user_name,piece,price
from user_trade
where dt='2019-04-09'
and goods_category='food';

在这里插入图片描述

select goods_category,
		sum(distinct user_name) as user_sum,
		sum(pay_amount) as total_amount 
from user_trade
where dt BETWEEN '2019-01-01' AND '2019-04-30'
group by goods_category;

在这里插入图片描述

select user_name,sum(pay_amount) as total_amount
from user_trade
where dt BETWEEN '2019-04-01' AND '2019-04-30'
group by user_name
having sum(pay_amount) > 50000;

在这里插入图片描述

select user_name,sum(pay_amount) as total_amount
from user_trade
where dt BETWEEN '2019-04-01' AND '2019-04-30'
group by user_name
order by sum(pay_amount) desc
limit 5;

将user_trade中的时间戳转为以下时间格式
HiveQL学习笔记(五):Hive练习题_第3张图片

select pay_time,
from_unixtime(pay_time,'yyyy-MM-dd hh:mm:ss')
from user_trade
where dt='2019-04-09';
****************************
select pay_time,
from_unixtime(pay_time,'yyyy-MM-dd hh')
from user_trade
where dt='2019-04-09';
****************************
select pay_time,
from_unixtime(pay_time,'yyyy-MM-dd hh:mm')
from user_trade
where dt='2019-04-09';
****************************
select pay_time,
from_unixtime(pay_time,'yyyyMMdd')
from user_trade
where dt='2019-04-09';

在这里插入图片描述

select user_name,
datediff('2019-05-01',to_date(firstactivetime))
from user_info
limit 10;

在这里插入图片描述

select case when age<20 then '20岁以下'
			when age>=20 and age<30 then'20-30岁'
			when age>=30 and age<40 then'30-40岁'
			else '40岁以上'
			end as '年龄分组',
	   count(distinct user_id) as user_num
from user_info
group by case when age<20 then '20岁以下'
			  when age>=20 and age<30 then'20-30岁'
			  when age>=30 and age<40 then'30-40岁'
			  else '40岁以上'
			  end;

在这里插入图片描述

select sex,
if(level>5,'高级','低级'),
count(distinct user_id) as user_num
from user_info
group by sex,if(level>5,'高级','低级');

在这里插入图片描述

select substr(firstactivetime,1,7) as month,
count(distinct user_id) as user_num
from user_info
group by substr(firstactivetime,1,7);

在这里插入图片描述
HiveQL学习笔记(五):Hive练习题_第4张图片

select get_json_object(extra1,'$.phonebrand') as phone_brand,
count(distinct user_id) as user_num
from user_info
group by get_json_object(extra1,'$.phonebrand');
************************************
select extra2['phonebrand'] as phone_brand,
count(distinct user_id) as user_num
from user_info
group by extra2['phonebrand'];

在这里插入图片描述

select avg(pay_amount) as avg_mount,
datediff(max(from_unixtime(pay_time,'yyyy-MM-dd')),
min(from_unixtime(pay_time,'yyyy-MM-dd')))
from user_trade
where year(dt)=2018
and user_name='ELLA';

在这里插入图片描述

select count(a.user_name)
from
(select user_name,
count(distinct goods_category) as num_goods
from user_trade
where year(pay_time) = 2018
group by user_name
having count(distinct goods_category)>2) as a

在这里插入图片描述

select a.age_type,
if(a.marriage_status=1,'已婚','未婚'),
count(distinct a.user_id)
from
(select case when age>=20 and age<30 then '20-30岁'
			 when age>=30 and age<40 then '30-40岁'
			 else '40岁以上'
			 end as age_type,
	   get_json_object(extra1,'$.marriage_status') as marriage_status,
	   user_id
from user_info
where year(firstactivetime)=2018) as a
where a.age_type in ('20-30岁','30-40岁')
group by a.age_type,
if(a.marriage_status=1,'已婚','未婚');

在这里插入图片描述

select count(distinct user_id)
from user_info
where datediff(from_unixtime(unix_timestamp(),'yyyy-MM-dd',to_date(firstactivetime))>300
group by sex;

在这里插入图片描述

select sex,extra2['education'] as education,count(distinct user_id)
from user_info
group by sex,extra2['education'];

在这里插入图片描述

select dt,goods_category,sum(pay_amount) as total_amount
from user_trade
where dt BETWEEN '2019-01-01' AND '2019-04-30'
group by dt,goods_category;

HiveQL学习笔记(三)对应的练习题

HiveQL学习笔记(三):Hive表连接
HiveQL学习笔记(五):Hive练习题_第5张图片
HiveQL学习笔记(五):Hive练习题_第6张图片
在这里插入图片描述

select a.user_name
from
(select distinct user_name
from user_trade
where year(dt)=2019) a
join
(select distinct user_name
from user_refund
where year(dt)=2019) b
on a.user_name = b.user_name;

在这里插入图片描述

select a.user_name
from
(select distinct user_name
from user_trade
where year(dt)=2017) a
join
(select distinct user_name
from user_trade
where year(dt)=2018) b
on a.user_name=b.user_name;

在这里插入图片描述

select a.user_name
from
	(select distinct user_name
	from user_trade
	where year(dt)=2017) a
join
	(select distinct user_name
	from user_trade
	where year(dt)=2018) b
	on a.user_name=b.user_name
join
	(select distinct user_name
	from user_trade
	where year(dt)=2019) c
	on b.user_name=c.user_name;

在这里插入图片描述

select a.user_id,a.user_name
from user_list_1 a
left join 
user_list_2 b
on a.user_id=b.user_id
where b.user_id is null;

在这里插入图片描述

select a.user_name
from
(select distinct user_name
from user_trade
where year(dt)=2019) a
left join
(select distinct user_name
from user_refund
where year(dt)=2019) b
on a.user_name = b.user_name
where b.user_name is null;

在这里插入图片描述

select b.education,count(a.user_name)
from
(select distinct user_name
from user_trade
where year(dt)=2019) a
left join
(select distinct user_name,
get_json_object(extra1,'$.education') as education
from user_info) b
on a.user_name = b.user_name
group by b.education;

HiveQL学习笔记(五):Hive练习题_第7张图片
在这里插入图片描述

select a.user_name
from
		(select distinct user_name
		from trade_2017) a
	join
		(select distinct user_name
		from trade_2018) b
		on a.user_name=b.user_name
	left join
		(select distinct user_name
		from trade_2019) c
		b.user_name = c.user_name
where c.user_name is null;

在这里插入图片描述

select coalesce(a.user_name,b.user_name)
from user_list_1 as a
full join
user_list_2 as b
on a.user_id=b.user_id;

在这里插入图片描述

select count(distinct a.user_name)
from
(
	select user_name
	from trade_2017
union all
	select user_name
	from trade_2018
union all
	select user_name
	from trade_2019
) a;

在这里插入图片描述

select a.user_name,
sum(a.pay_amount),
sum(a.refund_amount)
#将0和真实值相加
from
(
	select user_name,
	sum(pay_amount) as pay_amount,
	0 as refund_amount 
	# 将不存在的refund_amount列用0填充
	from user_trade
	where year(dt)=2019
	group by user_name
union all
	select user_name,
	0 as pay_amount,
	sum(refund_amount) as refund_amount
	from user_refund
	where year(dt)=2019
	group by user_name
) a
group by a.user_name;

在这里插入图片描述

select a.user_name,
a.pay_amount,
b.refund_amount
from
(select user_name,
sum(pay_amount) as pay_amount,
# 将不存在的refund_amount列用0填充
from user_trade
where year(dt)=2019
group by user_name) a
left join
(select user_name,
sum(refund_amount) as refund_amount
from user_refund
where year(dt)=2019
group by user_name) b
on a.user_name=b.user_name;


在这里插入图片描述

select a.age_type,
	   count(a.user_name)
from
(
	select case when age<20 then '20岁以下'
			when age>=20 and age<30 then'20-30岁'
			when age>=30 and age<40 then'30-40岁'
			else '40岁以上'
			end as age_type,
		user_name
	from user_info
	where year(firstactivetime)=2017
) a
left join
(
	select distinct user_name
	from user_trade
	where dt>0
) b
on a.user_name=b.user_name
group by age_type;

HiveQL学习笔记(五):Hive练习题_第8张图片
在这里插入图片描述

select b.hour(firstactivetime),count(a.user_name)
from
(select user_name
from trade_2018
union
select
from trade_2019) a
left join 
(select user_name,hour(firstactivetime)
from user_info) b
on a.user_name=b.user_name
group by hour(firstactivetime);

HiveQL学习笔记(四)对应的练习题

HiveQL学习笔记(四):Hive窗口函数
HiveQL学习笔记(五):Hive练习题_第9张图片
在这里插入图片描述

#1.先求每个月的
#2.在每个月的基础上再做累计(需要有子查询)
select a.month,a.pay_amount
sum(a.pay_amount) over(order by a.month)
from
(select month(dt) month,sum(pay_amount) pay_amount
from user_trade
where year(dt)=2019
group by month(dt)) a;

按照季度:

select a.season,a.pay_amount
sum(a.pay_amount) over(order by a.season)
from
(select case when month(dt) in (1,2,3) then 'Q1'
 			  when month(dt) in (4,5,6) then 'Q2'
 			  when month(dt) in (7,8.9) then 'Q3'
 			  else 'Q4' 
 			  end as season,
 	   sum(pay_amount) pay_amount
 from user_trade
 where year(dt)=2019
 group by case when month(dt) in (1,2,3) then 'Q1'
 			  when month(dt) in (4,5,6) then 'Q2'
 			  when month(dt) in (7,8.9) then 'Q3'
 			  else 'Q4' 
 			  end) a;

在这里插入图片描述

select a.year,a.month,a.pay_amount
sum(a.pay_amount) over(partition by a.year order by a.month)
from
(
select sum(pay_amount) pay_amount,
month(dt) month,
year(dt) year
from user_trade
where year(dt) in (2017,2018)
group by year(dt),month(dt)
) as a;

在这里插入图片描述

select a.month,
a.pay_amount,
avg(a.pay_amount) over(order by a.month 
	rows between 2 preceding and current row)
from
(
select month(dt) month,
sum(pay_amount) pay_amount
from user_trade
where year(dt) = 2018
group by month(dt)
) as a;

HiveQL学习笔记(五):Hive练习题_第10张图片
在这里插入图片描述

select user_name,
count(distinct goods_category)
row_umber() over(order by count(distinct goods_category)),
rank() over(order by count(distinct goods_category)),
dense_rank() over(order by count(distinct goods_category)),
from user_trade
where substr(dt,1,7) = '2019-01'
#从第一个截取到第七个
group by user_name;

在这里插入图片描述
一次工作完不成时,考虑子表查询:先用where筛选年份,进行排名,然后子查询再用where选出符合的排名。

select a.user_name,
a.pay_amount,
a.rank
from
(
select user_name,
sum(pay_amount) pay_amount,
rank() over(order by sum(a.pay_amount) desc) rank
from user_trade
where substr(dt,1,4) = '2019'
group by user_name
) as a
where a.rank in (10,20,30);

在这里插入图片描述

select user_name,
sum(pay_amount) pay_amount,
ntile(5) over(order by pay_amount desc) level
from user_trade
where substr(dt,1,7) = '2019-01'
group by user_name;

取前x%,用ntile(1/x)。钱10%,n=10.
在这里插入图片描述

select a.user_name,
a.refund_amount,
a.level
from
(
select user_name,
sum(refund_amount) refund_amount,
ntile(10) over(order by refund_amount desc) level
from user_refund
where year(dt)=2019
group by user_name
) as a
where a.level = 1;

在这里插入图片描述

select count(distinct user_name)
from
(
select user_name,
dt,
lead(dt) over(partition by user_name order by dt) lead_dt
from user_name
where dt > '0'
#清除异常dt数据
) as a
where a.lead_dt is not null
#lead_dt若为null,无法进行计算
and datediff(a.lead_dt,a.dt) > 100;

HiveQL学习笔记(五):Hive练习题_第11张图片
HiveQL学习笔记(五):Hive练习题_第12张图片

在这里插入图片描述

#1.先将2018年,城市,性别,每个用户的支付金额求出来,即a,b表
#2.在此基础上,按照不同城市,不同性别(partition by),对支付金额进行排序操作(rank(),oeder by),排序后的表为c表
#3.最后用where,进行筛选
select c.user_name,
c.city,
c.sex,
c.pay_amount,
c.rank
from
(
	select a.user_name,
	b.city,
	b.sex,
	a.pay_amount,
	rank() over(partition by b.city,b.sex order by a.pay_amount desc) rank
	from
		(
			select distinct user_name,
			sum(pay_amount) pay_amount
			from user_trade
			where year(dt)=2019
			group by user_name
		) a
	left join
		(
			select user_name,city,sex
			from user_info
		) b
	on a.user_name = b.user_name
) c
where c.rank <= 3;

HiveQL学习笔记(五):Hive练习题_第13张图片
在这里插入图片描述

#思路同上
select *
from
(
	select a.user_name,
	extra2['phonebrand'] as phonebrand,
	a.refund_amount,
	ntile(4) over(partition by extra2['phonebrand'] order by a.refund_amount desc) level
	from
		(
			select user_name,
			sum(refund_amount) refund_amount
			from user_refund
			where dt > '0'
			group by user_name
		) a
		left join
			user_info b
		on a.user_name = b.user_name
) c
where c.level=1;

你可能感兴趣的:(Hive)