hive-学习汽车销售分析

–汽车销售日志分析–
show databases;
create database if not exists sale_db;
use sale_db ;
show tables;

–创建数据仓库
create external table car_table(
province string, --省份
months int, --月
city string, --市
district string, --区县
years int, --年
model string,–车辆型号
manufacturer string,–制造商
brand string, --品牌
vehicletype string,–车辆类型
ownership string,–所有权
nature string, --使用性质
quantity int,–数量
enginemodel string,–发动机型号
displacement int,–排量
powers double, --功率
fuel string,–燃料种类
length1 int,–车长
width1 int,–车宽
height1 int,–车高
length2 int,–厢长
width2 int,–厢宽
height2 int,–厢高
numberofaxles int,–轴数
wheelbase int,–轴距
frontwheelbase int,–前轮距
tirespecification string,–轮胎规格
tirenumber int,–轮胎数
totalquality int,–总质量
completequality int,–整备质量
approvedquality int,–核定载质量
approvedpassenger string,–核定载客
tractionquality int,–准牵引质量
chassisenterprise string,–底盘企业
chassisbrand string,–底盘品牌
chassismodel string,–底盘型号
engineenterprise string,–发动机企业
vehiclename string,–车辆名称
age int,–年龄
gender string --性别
)
row format delimited
fields terminated by ‘,’
location ‘/data/sale’
tblproperties(“skip.header.line.count”=“1”); --跳过文件行首1行;

SELECT * from car_table;
–处理空串与null
alter table car_table set serdeproperties(‘serialization.null.format’=‘’);
desc formatted car_table ;

–数据分析
–数据记录数 70640
SELECT count(*) from car_table ;

–乘用车辆(非营运)和商用车辆(非营运以外的)的销售数量和比例
SELECT
sum(if(t1.nature=‘非营运’,t1.sal_nums,0)) sycar_nums – 66478
, sum(if(t1.nature!=‘非营运’,t1.sal_nums,0)) cycar_nums – 3884
from (
SELECT
t.nature
, COUNT(*) sal_nums
from car_table t
where t.nature is not null
group by t.nature)t1 ;

–山西省2013年每个月的汽车销售数量比例
–1\10\11\12月份的销量比较高
SELECT
t.months
, count() months_nums
, (select count(
) from car_table where months is not null) years_nums
, round(count()/(select count() from car_table where months is not null),4)*100||‘%’ months_raits
from car_table t
where t.months is not NULL
group by t.months;

–买车的男女比例和男女对车的品牌的选择
SELECT
t.gender
, count()
, (select count(
) from car_table where gender = ‘男性’ or gender = ‘女性’) nv_nums
, round(count()/(select count() from car_table where gender = ‘男性’ or gender = ‘女性’),4)*100||‘%’ raite
from car_table t
where t.gender = ‘男性’ or t.gender = ‘女性’
group by t.gender;

SELECT
t.brand
, t.gender
, count()
from car_table t
where (t.gender = ‘男性’ or t.gender = ‘女性’) and t.brand is not null
group by t.brand , t.gender
order by count(
) desc;

–车的所有权,车辆型号,车辆类型
SELECT
t1.concat_str
, count()
from (
SELECT
concat(model,ownership,vehicletype) concat_str–t.model||t.ownership||t.vehicletype
from car_table t) t1
group by t1.concat_str
order by count(
) desc;

–不同车型在每个月的销售量
SELECT
t.months
, t.vehicletype
, count()
from car_table t
where t.vehicletype is not null and t.months is not null
group by t.months ,t.vehicletype
order by t.months ,count(
) desc;

–不同品牌车销售情况,统计发动机和燃料种类 前三全部是五菱
SELECT
t.brand, t.enginemodel, t.fuel,count()
from car_table t
where t.brand is not null and t.enginemodel is not null and t.fuel is not null
group by t.brand, t.enginemodel, t.fuel
order by count(
) desc;

–统计五菱某一年每月的销售量 仍然是1\10\11\12这几个月的销量比较好
SELECT
t.brand
, t.months
, count(*)
from car_table t
where t.brand = ‘五菱’
group by t.brand ,t.months

你可能感兴趣的:(hive,hive,学习,汽车)