【机器学习实战】使用XGBoost、RandomForest、线性回归实现波士顿房价回归预测

1. 代码

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split, cross_val_score
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


# 获取数据
data = load_boston()
# 筛选特征值、目标值
x = data.data
y = data.target
# 划分数据集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=420)
# 实例化预估器
reg = XGBRegressor(n_estimators=100)
# 训练
reg.fit(x_train, y_train)
# 预测
y_predict = reg.predict(x_test)
# 模型评估
print('XGBoost的R2决定系数的值为:', reg.score(x_test, y_test))
print('XGBoost的MSE的值为:', mean_squared_error(y_test, y_predict))
# 树模型的优势之一,能够查看模型的重要性分数,可以使用嵌入法(selectFromModel)进行特征选择
print('XGBoost的重要性分数的值为:', reg.feature_importances_)
print('----------------------------')

# 2. 交叉验证对比随机森林、XGBoost、线性回归
reg = XGBRegressor(n_estimators=100)  # 导入没有经过训练的模型
print('XGBoost交叉验证的R2结果为:', cross_val_score(reg, x_train, y_train, cv=5).mean())
print('XGBoost交叉验证的负MSE的值为:', cross_val_score(reg, x_train, y_train, cv=5, scoring='neg_mean_squared_error').mean())
print('----------------------------')

# 随机森林
rfr = RandomForestRegressor(n_estimators=100)
print('随机森林交叉验证的R2结果为:', cross_val_score(rfr, x_train, y_train, cv=5).mean())
print('随机森林交叉验证的负MSE结果为:', cross_val_score(rfr, x_train,y_train,cv=5,scoring='neg_mean_squared_error').mean())
print('----------------------------')

# 线性回归
lr = LinearRegression()
print('线性回归交叉验证的R2结果为:',cross_val_score(lr, x_train,y_train,cv=5).mean())
print('线性回归交叉验证的负MSE结果为:', cross_val_score(lr,x_train,y_train,cv=5,scoring='neg_mean_squared_error').mean())

2. 结果

【机器学习实战】使用XGBoost、RandomForest、线性回归实现波士顿房价回归预测_第1张图片

你可能感兴趣的:(机器学习,线性回归,波士顿房价,回归预测,XGBoost)