- 第一次直播:对赛题的理解以及数据分析步骤,需要用到的一些方法、工具的总结:
- 第一次直播:探索性分析(EDA):
评价标准为MAE(Mean Absolute Error):
1.3.1 数据读取pandas
#1.3.1 数据读取pandas
import pandas as pd
import numpy as np
## 1) 载入训练集和测试集
path = 'C:/Users/OY/Desktop/GAME/'
Train_data = pd.read_csv(path+'used_car_train_20200313.csv', sep=' ')
Text_date = pd.read_csv(path+'used_car_testA_20200313.csv', sep=' ')
print('Train_data shape:',Train_data.shape)
print('TextA_date shape:',Train_data.shape)
print(Train_data.head())
#1.3.2 分类指标评价计算示例
## accuracy
import numpy as np
from sklearn.metrics import accuracy_score
y_pred = [0, 1, 0, 1]
y_true = [0, 1, 1, 1]
print('ACC:',accuracy_score(y_true, y_pred))
## Precision,Recall,F1-score
from sklearn import metrics
y_pred = [0, 1, 0, 0]
y_true = [0, 1, 0, 1]
print('Precision',metrics.precision_score(y_true, y_pred))
print('Recall',metrics.recall_score(y_true, y_pred))
print('F1-score:',metrics.f1_score(y_true, y_pred))
## AUC
import numpy as np
from sklearn.metrics import roc_auc_score
y_true = np.array([0, 0, 1, 1])
y_scores = np.array([0.1, 0.4, 0.35, 0.8])
print('AUC socre:',roc_auc_score(y_true, y_scores))
#1.3.3 回归指标评价计算示例
# coding=utf-8
import numpy as np
from sklearn import metrics
# MAPE需要自己实现
def mape(y_true, y_pred):
return np.mean(np.abs((y_pred - y_true) / y_true))
y_true = np.array([1.0, 5.0, 4.0, 3.0, 2.0, 5.0, -3.0])
y_pred = np.array([1.0, 4.5, 3.8, 3.2, 3.0, 4.8, -2.2])
# MSE
print('MSE:',metrics.mean_squared_error(y_true, y_pred))
# RMSE
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_true, y_pred)))
# MAE
print('MAE:',metrics.mean_absolute_error(y_true, y_pred))
# MAPE
print('MAPE:',mape(y_true, y_pred))
## R2-score
from sklearn.metrics import r2_score
y_true = [3, -0.5, 2, 7]
y_pred = [2.5, 0.0, 2, 8]
print('R2-score:',r2_score(y_true, y_pred))