create table sjfx as select salary from job where name like '%数据采集%';
select * from sjfx;
create table bigdata as select salary from job where name like '%大数据开发工程师%';
select * from bigdata;
create table sjcj as select salary from job where name like '%数据采集%';
select * from sjcj;
2、分析“数据分析”岗位的平均工资、最高工资、最低工资
2.1去掉分析表fx中的引号。
create table fx1 as select regexp_replace(salary,':','')as salary from sjfx;
2.2去除表里的所有空值。
insert overwrite table fx1 select * from sjfx where salary is not null;
select * from fx1;
create table fx2 as select * from fx1 group by salary;
5.1得到按照年份结算的薪资。
create table xz_year as select * from fx2 where salary like '%年';
查看一下表里面的数据:
select * from xz_year;
create table xz_month as select * from fx2 where salary like '%月';
查看一下表里面的数据:
select * from xz_month;
create table xz_year1 as select regexp_replace(salary,'/年','')as salary from xz_year;
create table xz_month1 as select regexp_replace(salary,'/月','')as salary from xz_month;
查看一下表里面的数据:
select * from xz_year1;
select * from xz_month1;
create table xz_month2 as select regexp_replace(salary,'千','')as salary from xz_month1 where salary like '%千';
select * from xz_month2;
create table xz_month3 as select regexp_replace(salary,'万','')as salary from xz_month1 where salary like '%万';
select * from xz_month3;
create table xz_year2 as select regexp_replace(salary,'万','')as salary from xz_year1 where salary like '%万';
select * from xz_year2;
create table xz_month4 as select split(salary,'-')[0] as min,split(salary,'-')[1] as max from xz_month2;
select * from xz_month4;
create table xz_month5 as select split(salary,'-')[0] as min ,split(salary,'-')[1] as max from xz_month3;
select * from xz_month5;
create table xz_year3 as select split(salary,'-')[0] as min ,split(salary,'-')[1] as max from xz_year2;
select * from xz_year3;
create table xz_month6 as select min*1000 as min,max*1000 as max,(min+max)/2*1000 as avg from xz_month4;
select * from xz_month6;
create table xz_month7 as select min*10000 as min,max*10000 as max,(min+max)/2*10000 as avg from xz_month5;
select * from xz_month7;
create table xz_year4 as select min*10000/12 as min,max*10000/12 as max,(min+max)/2*10000/12 as avg from xz_year3;
select * from xz_year4;
insert into table xz_year4 select * from xz_month6;
insert into table xz_year4 select * from xz_month7;
5.9创建薪资总表,得到数据。
create table sjfx_all as select min(min) as min,max(max) as max,avg(avg) as avg from xz_year4;
select * from sjfx_all;
5.10、利用sqoop将hive数据导出到mysql
sqoop export --connect "jdbc:mysql://localhost:3306/test?characterEncoding=UTF-8" --username root --password P@ssw0rd --table fx --fields-terminated-by '\001' --export-dir '/user/hive/warehouse/test.db/sjfx_all';