Hive综合应用案例——用户学历查询

[Hive综合应用案例——用户学历查询

Hive综合应用案例——用户学历查询_第1张图片

---------- 禁止修改 ----------
 drop database if exists mydb cascade;
---------- 禁止修改 ----------


---------- begin ----------
---创建mydb数据库

create database if not exists mydb;
---使用mydb数据库
use mydb;

---创建表user
create table   usertab(
    id string ,
    sex string  ,
    time string  ,
    education string  ,
    occupation string ,
    income string ,
    area string ,
    desired_area string,
    city_countryside string
)row format delimited fields terminated by ','; 




---导入数据:/root/data.txt
load data local inpath '/root/data.txt' into table usertab;

--查询每一个用户从出生到2019-06-10的总天数
select id,datediff('2019-06-10',regexp_replace(time,'/','-')) from usertab;




---------- end ----------
-- create table  if not exists usertab(
--     id string comment '用户id',
--     sex string comment '性别,f:女性,m:男性',
--     time string comment '出生日期',
--     education string comment '学历',
--     occupation string comment '职业',
--     income string comment '收入',
--     area string comment '出生地区',
--     desired_area string comment '向往地区',
--     city_countryside string comment '超市/农村'
-- )row format delimited fields terminated by ','; 


Hive综合应用案例——用户学历查询_第2张图片

---------- 禁止修改 ----------
 drop database if exists mydb cascade;
---------- 禁止修改 ----------


---------- begin ----------
--创建mydb数据库

create database if not exists mydb;
---使用mydb数据库
use mydb;

---创建表user
create table   usertab1(
    id int ,
    sex string  ,
    time string  ,
    education string  ,
    occupation string ,
    income string ,
    area string ,
    desired_area string,
    city_countryside string
)row format delimited fields terminated by ','; 




---导入数据:/root/data.txt
load data local inpath '/root/data1.txt' into table usertab1;



--同一个地区相同的教育程度的最高收入
select area,education,income from (
select area,education,income,
row_number() over(partition by area,education order by income desc)
as rn 
from usertab1
)a where a.rn=1;





---------- end ----------
-- select area,education,income from(
--     select area,education,income,
--     row_number() over(partition by area,education order by income desc) as rn
--     from usertab1
-- ) where a.rn=1;

Hive综合应用案例——用户学历查询_第3张图片

---------- 禁止修改 ----------

 drop database if exists mydb cascade;
 set hive.mapred.mode=nonstrict;
---------- 禁止修改 ----------


---------- begin ----------
--创建mydb数据库

create database if not exists mydb;
---使用mydb数据库
use mydb;

---创建表user
create table   usertab2(
    id int ,
    sex string  ,
    time string  ,
    education string  ,
    occupation string ,
    income string ,
    area string ,
    desired_area string,
    city_countryside string
)row format delimited fields terminated by ','; 




---导入数据:/root/data.txt
load data local inpath '/root/data.txt' into table usertab2;



--统计各级学历所占总人数百分比(对结果保留两位小数)
select concat(round(a.cnted*100/b.cnt,2),'%'),a.education from(
    select count(*) as cnted,education from usertab2 group by education
)a ,(
    select count(*) as cnt from usertab2
)b order by a.education;



-- select concat(round(a.cnt*100/a.cnt_total*100,2),'%')as ct,education from(
--     select count(*) over(partition by education) as cnt,
-- count(*) over as cnt_total,education,row_number() over(partition by education)
-- )a where a.rn=1 order by education;

---------- end ----------

你可能感兴趣的:(Hive,hadoop)