重 点:
内 容:
基站编号 |
省份 |
城市 |
纬度 |
经度 |
58015 |
安徽 |
砀山 |
34.27 |
116.2 |
# hadoop fs -mkdir /china_stn
# hadoop fs -put /home/data/China_stn_city.csv /china_stn
# hive --service metastore &
hive> create database china_all;
hive> use china_all;
hive> create external table china_all.china_all(
stn string,
year string,
month string,
day string,
hour string,
temp string,
dew_point_temp string,
pressure string,
wind_direction string,
wind_speed string,
clouds string,
precipitation_1 string,
precipitation_6 string
row format delimited
fields terminated by ',' stored as textfile location '/china_all';
hive> select * from china_all limit 10;
hive> create external table china_all.stn_city(
stn string,
province string,
city string,
latitude string,
longitude string
row format delimited
fields terminated by ',' stored as textfile location '/china_stn';
hive> select * from stn_city limit 10;
hive> create table china_all.tmp_city(
stn string,
year string,
month string,
day string,
hour string,
temp string,
dew_point_temp string,
pressure string,
wind_direction string,
wind_speed string,
clouds string,
precipitation_1 string,
precipitation_6 string,
province string,
city string
row format delimited
fields terminated by ',' stored as textfile;
hive> insert overwrite table tmp_city
select c2.*,sc.province,sc.city from china_all as c2 left join stn_city as sc on c2.stn = sc.stn where sc.province is not null and sc.city is not null;
hive> select * from tmp_city limit 10;
hive> create table china_all.tmp_city_2022 as
select * from tmp_city where year = 2022;
hive> select * from tmp_city_2022 limit 10;
hive> create table china_map(
month string,
province string,
temp string,
wind_speed string
row format delimited
fields terminated by ',' stored as textfile;
hive> insert overwrite table china_map select month,province,avg(temp),avg(wind_speed) from tmp_city_2022 where temp <> '-9999' and wind_speed <> '-9999' group by month,province;
hive> select * from china_map;
hive> create table city_precipitation_top10(
month string,
city string,
precipitation_6 string
row format delimited
fields terminated by ',' stored as textfile;
hive> insert overwrite table city_precipitation_top10
select t2.month,t2.city,t2.pre6 from
(select *,row_number() over(partition by t1.month order by t1.pre6 desc) as number from
(select month,city,avg(precipitation_6) as pre6 from tmp_city_2022 where precipitation_6<>-9999 and precipitation_6>=0 group by month,city order by month,pre6 desc) as t1)as t2 where t2.number<=10;
hive> select * from city_precipitation_top10;
hive> create table city_temp(
month string,
city string,
temp string
row format delimited
fields terminated by ',' stored as textfile;
hive> insert overwrite table city_temp
select month,city,avg(temp) as tmp from tmp_city_2022 where temp<>-9999 group by month,city;
hive> select * from city_temp limit 30;
hive> create table province_temp(
province string,
month string,
temp string,
forecast string
row format delimited
fields terminated by ',' stored as textfile;
hive> insert overwrite table province_temp
select province,month,avg(temp),'0' from tmp_city_2022 where temp<>-9999 group by province,month order by province,month;
hive> select * from province_temp;
hive> create table province_pressure(
month string,
province string,
pressure string
row format delimited
fields terminated by ',' stored as textfile;
hive> insert overwrite table province_pressure
select month,province,avg(pressure) as pressure from tmp_city_2022 where pressure<>-9999 group by month,province;
hive> select * from province_pressure;
hive> create table province_temp_all(
year string,
province string,
month string,
temp string
row format delimited
fields terminated by ',' stored as textfile;
hive> insert overwrite table province_temp_all
select year,province,month,avg(temp) from tmp_city where temp<>-9999 group by year,province,month order by year,province,month;
hive> select * from province_temp_all limit 30;