评价指标主要包括:
R-Square (R^2)决定系数、线性相关系数
取值范围为 [0,1] ,越接近1,模型效果越好,越接近0,效果越差;但是随着预测数据量的增加会增加降低R2值,因此只能大致评估模型
from sklearn.metrics import r2_score
print(r2_score(y_test,result_prediction))
from sklearn.metrics import mean_squared_error
print(mean_squared_error(y_test,result_prediction))
Root Mean Squard Error(RMSE) 均方根误差
import numpy as np
from sklearn.metrics import mean_squared_error
print(np.sqrt(mean_squared_error(y_test,result_prediction)))
Mean Absolute Error(MAE) 平均绝对误差
相当于求预测值与真实值的差值的绝对值,累加后求平均
from sklearn.metrics import mean_absolute_error
print(mean_absolute_error(y_test,result_prediction))
Mean Absolute Percentage Error (MAPE)
指标值域是[0,正无穷),特别的,当数据中的“0”值较多时,这个指标会特别大,因此可以替换为SMAPE
from sklearn.metrics import mean_absolute_percentage_error
mape = mean_absolute_percentage_error(y_test, result_prediction)
symmetric Mean Absolute Percentage Error (sMAPE)
import numpy as np
smape = 100 / len(y_test) * np.sum(
2 * np.abs(result_prediction - y_test) / (np.abs(y_test) + np.abs(result_prediction)))
def regression_evaluation(true_value: np.ndarray, pred_value: np.ndarray) -> pd.DataFrame:
"""回归问题得到预测结果的指标
:param true_value:真实值
:param pred_value:预测值
"""
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
r2 = r2_score(true_value, pred_value)
mae = mean_absolute_error(true_value, pred_value)
rmse = np.sqrt(mean_squared_error(true_value, pred_value))
mse = mean_squared_error(true_value, pred_value)
smape = 100 / len(true_value) * np.sum(
2 * np.abs(pred_value - true_value) / (np.abs(true_value) + np.abs(pred_value)))
# MAPE指标
# from sklearn.metrics import mean_absolute_percentage_error
# mape = mean_absolute_percentage_error(true_value, pred_value)
dataframe = pd.DataFrame([r2, mae, rmse, mse, smape]).T
dataframe.columns = ['r2', 'MAE', 'RMSE', 'MSE', "SMAPE"]
return dataframe
import pandas as pd
import numpy as np
def regression_evaluation(true_value: np.ndarray, pred_value: np.ndarray) -> pd.DataFrame:
"""回归问题得到预测结果的指标
:param true_value:真实值
:param pred_value:预测值
"""
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
r2 = r2_score(true_value, pred_value)
mae = mean_absolute_error(true_value, pred_value)
rmse = np.sqrt(mean_squared_error(true_value, pred_value))
mse = mean_squared_error(true_value, pred_value)
smape = 100 / len(true_value) * np.sum(
2 * np.abs(pred_value - true_value) / (np.abs(true_value) + np.abs(pred_value)))
# MAPE指标
# from sklearn.metrics import mean_absolute_percentage_error
# mape = mean_absolute_percentage_error(true_value, pred_value)
dataframe = pd.DataFrame([r2, mae, rmse, mse, smape]).T
dataframe.columns = ['r2', 'MAE', 'RMSE', 'MSE', "SMAPE"]
return dataframe
def get_data():
"""用来生成数据
:return
ture_value:真实值
pred:预测值
"""
true_value = np.random.random(size=1000) * np.random.choice([-100, 100], size=1000)
pred = true_value + np.random.random(size=1000) * np.random.choice([-1, 1], size=1000)
return true_value, pred
def main():
true_value, predict_value = get_data()
re = regression_evaluation(true_value, predict_value)
print(re)
if __name__ == '__main__':
main()