Hive综合应用案例 — 用户搜索日志分析

头歌实践平台

Hive综合应用案例 — 用户搜索日志分析_第1张图片

---------- 禁止修改 ----------
 drop database if exists mydb cascade;
---------- 禁止修改 ----------


---------- begin ----------
---创建mydb数据库
create database mydb;

---使用mydb数据库
use mydb;

---创建表db_search
create table db_search(
 id int,
 key1 string,
 ranking int,
 or_der int,
 url string,
 time1 string)
row format delimited fields terminated by ' ' ;




---导入数据:/root/data.txt
load data local inpath '/root/data.txt' into table db_search;

--查询2018年点击量最多的10个网站域名
select url,count(*) a  
from db_search 
where year(time1)='2018'
group by url 
order by a desc limit 10;
---------- end ----------

Hive综合应用案例 — 用户搜索日志分析_第2张图片

---------- 禁止修改 ----------
 drop database if exists mydb cascade;
---------- 禁止修改 ----------


---------- begin ----------
---创建mydb数据库
create database mydb;

---使用mydb数据库
use mydb;

---创建表db_search
create table db_search(
 id int,
 key1 string,
 ranking int,
 or_der int,
 url string,
 time1 string)
row format delimited fields terminated by ' ' ;




---导入数据:/root/data.txt
load data local inpath '/root/data.txt' into table db_search;


--分析同一种搜索词,哪个网站域名被用户访问最多,并根据访问次数降序取前十。
select t.key1,t.url,t.cnt from(
select key1,url,count(*) cnt,row_number() over (partition by key1 order by count(*) desc) rk from db_search group by key1,url) t
where t.rk<=1 order by t.cnt desc limit 10;
---------- end ----------

Hive综合应用案例 — 用户搜索日志分析_第3张图片

---------- 禁止修改 ----------
 drop database if exists mydb cascade;
---------- 禁止修改 ----------


---------- begin ----------
---创建mydb数据库
create database mydb;

---使用mydb数据库
use mydb;

---创建表db_search
create table db_search(
 id int,
 key1 string,
 ranking int,
 or_der int,
 url string,
 time1 string)
row format delimited fields terminated by ' ' ;

---导入数据:/root/data.txt
load data local inpath '/root/data.txt' into table db_search;

--分析每年每月哪个搜索词被搜索次数最多。
select concat(t.y1,'-',t.m),t.key1,t.cnt
from(
select year(time1) y1,month(time1) m,key1,count(*) cnt,row_number() over (partition by year(time1),month(time1) order by count(*) desc) rk
from db_search group by year(time1),month(time1),key1) t
where t.rk<=1;




---------- end ----------

你可能感兴趣的:(hive,数据库,hadoop)