使用回归分析研究汽车销量和什么因素有关。
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['stxiHei']from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import seaborn as sns
from sklearn.metrics import r2_score, mean_squared_error
data = pd.read_csv(r'C:\Users\jxx\Desktop\13\汽车销售数据.csv',encoding = 'utf-8')
data = data[data['公路里程数'].notna()]#只有1行缺失值,直接舍弃
data = data[['传统汽车销量','国内生产总值当季值(亿元)x1', '汽油价格(元/吨)x2',
'人民币贷款基准利率%x3', '汽车总产量(万辆)x4', '公路里程数', '汽车整车股票指数', '消费者信心指数']]
data.head()
# 利用pandas分析其相关关系
cormatrix = data.corr()
cormatrix *= np.tri(*cormatrix.values.shape,k=-1).T
cormatrix
# 绘制相关关系的heatmap
#计算相关系数
corr_all = data.corr()
#创建分布
mask = np.zeros_like(corr_all,dtype = np.bool) #
mask[np.triu_indices_from(mask)] = True#创建firue,画出heatmap
g,ax = plt.subplots(figsize = (9,7))
sns.heatmap(corr_all,mask = mask,square = True, linewidths = .5,ax = ax ,cmap = 'BuPu')
plt.title('Correlation of Feactures')
plt.show()