下方有数据可免费下载
原始数据为csv格式,列数比较多
create database if not exists car;
use car;
create external table cars(
province string, --省份
month int, --月
city string, --市
district string, --区县
year int, --年
model string,--车辆型号
manufacturer string,--制造商
brand string, --品牌
vehicletype string,--车辆类型
ownership string,--所有权
nature string, --使用性质
quantity int,--数量
enginemodel string,--发动机型号
displacement int,--排量
power double, --功率
fuel string,--燃料种类
length1 int,--车长
width1 int,--车宽
height1 int,--车高
length2 int,--厢长
width2 int,--厢宽
height2 int,--厢高
numberofaxles int,--轴数
wheelbase int,--轴距
frontwheelbase int,--前轮距
tirespecification string,--轮胎规格
tirenumber int,--轮胎数
totalquality int,--总质量
completequality int,--整备质量
approvedquality int,--核定载质量
approvedpassenger string,--核定载客
tractionquality int,--准牵引质量
chassisenterprise string,--底盘企业
chassisbrand string,--底盘品牌
chassismodel string,--底盘型号
engineenterprise string,--发动机企业
vehiclename string,--车辆名称
age int,--年龄
gender string --性别
)
row format delimited
fields terminated by ','
location '/cars'
tblproperties("skip.header.line.count"="1"); --跳过文件行首1行;
desc cars;
[hadoop@hadoop000 hive_data]$ hadoop fs -put ./cars.csv /cars
select * from cars limit 10;
select '非营运',sum(if(a.nature='非营运',a.cnt,0)),'营运',sum(if(a.nature!='非营运',a.cnt,0))
from
(select nature,count(*) as cnt
from cars group by nature having nature is not null and nature!='') a;
Total MapReduce CPU Time Spent: 6 seconds 810 msec
OK
非营运 66478 营运 3884
Time taken: 32.365 seconds, Fetched: 1 row(s)
select month,c1.ss/c2.sum
from
(select month,sum(quantity) as ss
from cars where province = '山西省' and year = '2013' group by month ) c1,
(select sum(quantity) as sum
from cars where province = '山西省' and year = '2013') c2;
Total MapReduce CPU Time Spent: 9 seconds 150 msec
OK
1 0.14799181376311077
2 0.05831272561894204
3 0.09306159574770473
4 0.06587362496802251
5 0.0732071288479577
6 0.05547028225462608
7 0.06323015263920867
8 0.06378442909525028
9 0.06948352804070379
10 0.1044882180722549
11 0.10053722179585571
12 0.1045592791563628
Time taken: 53.486 seconds, Fetched: 12 row(s)
select '男性',B.man/(B.man+B.woman),'女性', B.woman/(B.man+B.woman)
from
(select '男性',sum(if(A.gender='男性',A.cnt,0)) as man,'女性',sum(if(A.gender='女性',A.cnt,0)) as woman
from
(select gender,count(*) as cnt
from cars where gender is not null and gender != '' group by gender) A) B;
Total MapReduce CPU Time Spent: 5 seconds 880 msec
OK
男性 0.7010659323952227 女性 0.29893406760477725
Time taken: 34.488 seconds, Fetched: 1 row(s)
select gender,brand,count(*) as cnt
from cars
where gender is not null and gender != '' and age is not null
group by gender,brand
having brand is not null and brand !=' '
order by cnt desc
limit 5;
Total MapReduce CPU Time Spent: 5 seconds 330 msec
OK
男性 五菱 28208
女性 五菱 12004
男性 长安 3679
男性 东风 3214
男性 五菱宏光 2331
Time taken: 33.615 seconds, Fetched: 5 row(s)
select a.cnt,count(*)
from
(select concat(model,ownership,vehicletype) as cnt from cars) a
group by a.cnt;
ZK6726DX3单位大型专用校车 1
ZK6726DXA9单位大型专用校车 4
ZK6729D2单位大型普通客车 2
ZK6729DB单位大型普通客车 16
ZK6731DG1单位大型普通客车 6
ZK6731NG1单位大型普通客车 24
ZK6750D2单位大型普通客车 17
......
select month,vehicletype,count(*) from cars group by vehicletype,month having month is not null and vehicletype is null and vehicletype != '';
select brand,enginemodel,fuel,count(*) from cars group by brand,enginemodel,fuel;
select brand,month,count(*) from cars group by brand,month having brand='五菱';
Total MapReduce CPU Time Spent: 3 seconds 940 msec
OK
五菱 1 5589
五菱 2 2226
五菱 3 3557
五菱 4 2389
五菱 5 3351
五菱 6 2302
五菱 7 2893
五菱 8 2980
五菱 9 3422
五菱 10 5278
五菱 11 4809
五菱 12 4963
Time taken: 16.416 seconds, Fetched: 12 row(s)