一、生成测试数据
import random
import pandas as pd
def generate_car_sales_data():
sale_time = random.choice(['2023-12-01', '2023-12-02', '2023-12-03', '2023-12-04', '2023-12-05'])
sale_location = random.choice(['beijing', 'shanghai', 'guangzhou', 'shenzhen', 'hangzhou'])
vehicle_type = random.choice(['CAR', 'SUV', 'MPV', 'Pickup'])
vehicle_model = random.choice(['Model 3', 'Model Y', 'Model S', 'Model X'])
manufacturer = random.choice(['tesla', 'BYD', 'BC', 'BMW', 'AODI'])
displacement = random.choice(['1.5L', '2.0L', '2.5L', '3.0L', '3.5L'])
power = random.choice(['100kW', '150kW', '200kW', '250kW', '300kW'])
engine_model = random.choice(['L4', 'V6', 'V8', 'W12'])
fuel_type = random.choice(['gasoline', 'diesel', 'GAS', 'electric'])
length, width, height = [random.randint(4000, 5000) for _ in range(3)]
wheelbase = random.randint(2500, 3000)
drive_type = random.choice(['Front-wheel', 'Rear-wheel', 'Four-wheel'])
tire_specification = random.choice(['205/55R16', '215/60R17', '225/65R18', '235/70R19', '245/75R20'])
tire_number = random.choice([4, 5])
passenger_capacity = random.choice([4, 5, 6, 7])
ownership = random.choice(['personal', 'company', 'lease'])
purchaser = random.choice(['zhangsan', 'lisi', 'wangwu', 'zhaoliu'])
return {
'销售时间': sale_time,
'销售地点': sale_location,
'车辆类型': vehicle_type,
'车辆型号': vehicle_model,
'制造商': manufacturer,
'排量': displacement,
'功率': power,
'发动机型号': engine_model,
'燃料种类': fuel_type,
'车辆长宽高': [length, width, height],
'轴距': wheelbase,
'驱动方式': drive_type,
'轮胎规格': tire_specification,
'轮胎数量': tire_number,
'载客数量': passenger_capacity,
'所有权': ownership,
'购买人': purchaser
}
car_sales_data = [generate_car_sales_data() for _ in range(1000)]
df = pd.DataFrame(car_sales_data)
df.to_csv('car_sales_data.csv', index=False)
二、创建HIVE数据库
1.创建数据库
CREATE EXTERNAL TABLE car_sales_data (
sale_time string comment '销售时间',
sale_location string comment '销售地点',
vehicle_type string comment '车辆类型',
vehicle_model string comment '车辆型号',
manufacturer string comment '制造商',
displacement string comment '排量',
power string comment '功率',
engine_model string comment '发动机型号',
fuel_type string comment '燃料种类',
vehicle_length string comment '车辆长度',
vehicle_width string comment '车辆宽度',
vehicle_height string comment '车辆高度',
wheelbase string comment '轴距',
drive_type string comment '驱动方式',
tire_specification string comment '轮胎规格',
tire_number string comment '轮胎数量',
passenger_capacity string comment '载客数量',
ownership string comment '所有权',
purchaser string comment '购买人'
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LOCATION '/car_sales';
2.装载数据
load data local inpath '/资源路径/car_sales_data.csv' overwrite into table stutest.car_sales_data;
3.查看数据
select * from car_sales_data limit 10;