from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import xgboost as xgb,numpy as np
from sklearn.metrics import mean_squared_error
boston = load_boston()
X = boston.data # 特征值
y = boston.target # 目标值
# 划分数据集,80% 训练数据和 20% 测试数据
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print(X_train.shape)
print(X_train)
(404, 13) [[2.59406e+01 0.00000e+00 1.81000e+01 ... 2.02000e+01 1.27360e+02 2.66400e+01] [1.88360e-01 0.00000e+00 6.91000e+00 ... 1.79000e+01 3.96900e+02 1.41500e+01] [8.87300e-02 2.10000e+01 5.64000e+00 ... 1.68000e+01 3.95560e+02 1.34500e+01] ... [3.73800e-02 0.00000e+00 5.19000e+00 ... 2.02000e+01 3.89400e+02 6.75000e+00] [1.40520e-01 0.00000e+00 1.05900e+01 ... 1.86000e+01 3.85810e+02 9.38000e+00] [5.64600e-02 0.00000e+00 1.28300e+01 ... 1.87000e+01 3.86400e+02 1.23400e+01]]
print(y_train.shape)
print(y_train)
(404,) [10.4 20. 19.7 27.5 18.7 17.5 35.4 30.7 17.8 23.8 28.5 33.8 16.1 18.9 13.8 13.4 21.2 26.6 22.9 22.9 50. 25. 24.4 18.2 13.2 15.1 12. 20.2 20.6 7. 27. 14.9 18. 23.2 24.1 32. 11.5 24.8 24.3 23.1 13. 11.3 19.8 31.6 17.2 19.4 23.1 22.8 19.7 35.1 30.1 24.8 21. 12.7 50. 28.7 24.4 22.8 50. 21.7 27.9 20.4 19.4 23.9 41.7 10.8 16.8 23.3 50. 23.3 30.5 20.3 5.6 15.6 14.9 12.6 20.4 14.1 29.4 26.4 31.7 50. 22.3 15.3 17.8 33.4 25. 5. 23.6 23. 23.1 15.6 18.2 50. 34.9 37. 21.1 19.2 22.8 44. 14. 24.1 17. 14.6 22.9 50. 21.8 22.6 26.2 17.1 21.2 27.5 33.2 20. 22.2 19.1 18.9 13.6 20.1 26.6 20. 19. 22. 17.3 22.6 20.8 23.9 20.3 22.3 23.8 30.1 21.7 38.7 14.9 18.6 11.9 36.2 11.8 16.6 42.8 17.7 16. 19.2 16.2 12.7 32.7 34.6 19. 11.7 22.6 24.8 34.7 16.1 22.2 20.1 20.7 23.8 24.7 10.2 13.5 22. 13.1 25. 23.8 13.8 17.2 28.4 36.4 7.2 19.1 22. 36.1 21.7 25.2 14.5 10.2 23.7 35.2 36.5 16.5 22.5 45.4 20.3 31.5 21.5 16.7 29. 19.3 13.9 13.4 23.7 22. 9.7 24.7 20.1 20.8 7.4 25. 19.9 24.1 10.9 19.3 15.2 22.7 19.3 8.1 23.2 13.1 43.1 19.5 12.5 22.2 18.7 30.3 16.8 15.6 17.5 23.5 21.4 17.4 22.6 20.6 11.8 15.2 19.6 13.4 50. 13.3 13.9 33.1 8.4 20.9 18.6 37.6 28.2 19.3 17.8 14.8 20. 28. 31.6 21.7 15.6 44.8 10.5 21.7 8.5 23.9 31.2 20.8 24.6 8.8 11. 20.2 23.7 23. 14.6 6.3 20.9 26.5 21.8 20.5 21.4 15.2 28.4 22.4 21.7 29.1 33.2 8.4 34.9 15.4 17.1 7. 22.7 20.4 17.1 50. 11.7 20.6 24.5 13.1 19.9 24.8 23.1 20.3 14.1 16.4 48.3 23.2 7.2 33.1 13.8 9.5 26.4 22.2 22.9 23.4 19.1 18.2 15. 17.4 21.9 9.6 24.4 24.6 22.2 13.8 16.2 35.4 12.1 23.1 29.1 7.2 50. 24.7 23.3 29.8 7.5 24.2 32. 50. 24.4 19.4 21.6 21. 22. 50. 23. 42.3 41.3 20.1 21.9 37.2 33.4 17.6 18.4 18.3 33.3 50. 22.5 18.5 19.9 15.7 10.4 19.5 19.4 23.7 19.9 13.3 26.7 34.9 15.4 11.9 21.4 23.1 19.3 32.4 23.1 8.8 21.2 21.6 22. 8.3 19.8 30.1 19.4 17.5 23.2 21.9 18.3 14.4 8.7 23.9 12.7 25. 20.6 39.8 19.5 46.7 21.7 17.8 20.6 19.4 32.9 13.1 16.3 18.7 14.3 29.6 48.8 19.5 26.6 43.8 25. 14.5 20.1 14.3 14.4 22. 27.5 20.7 28.1 21.2]
model_r = xgb.XGBRegressor(max_depth=3,
learning_rate=0.1,
n_estimators=100,
objective='reg:linear', # 此默认参数与 XGBClassifier 不同
booster='gbtree',
gamma=0,
min_child_weight=1,
subsample=1,
colsample_bytree=1,
reg_alpha=0,
reg_lambda=1,
random_state=0)
model_r.fit(X_train, y_train) # 使用训练数据训练
model_r.save_model('xgb100.model') #保存模型
model= xgb.Booster(model_file='xgb100.model') #模型加载
print(X_test)
X_test1 = xgb.DMatrix(X_test) #数据转化
print(X_test1)
[[2.11240e-01 1.25000e+01 7.87000e+00 ... 1.52000e+01 3.86630e+02 2.99300e+01] [5.47900e-02 3.30000e+01 2.18000e+00 ... 1.84000e+01 3.93360e+02 8.93000e+00] [5.78340e-01 2.00000e+01 3.97000e+00 ... 1.30000e+01 3.84540e+02 7.44000e+00] ... [1.77800e-02 9.50000e+01 1.47000e+00 ... 1.70000e+01 3.84300e+02 4.45000e+00] [7.40389e+00 0.00000e+00 1.81000e+01 ... 2.02000e+01 3.14640e+02 2.64000e+01] [1.25179e+00 0.00000e+00 8.14000e+00 ... 2.10000e+01 3.76570e+02 2.10200e+01]]
fit_pred1 = model.predict(X_test1) # 预测
print(fit_pred1)
print(y_test)
[17.918705 27.60571 42.924686 22.536045 13.672787 26.384132 20.25791 18.785957 21.029793 21.774109 23.311642 20.581146 16.32432 19.790043 34.803604 29.28651 21.927504 22.118399 18.428593 18.195719 21.481672 21.40822 11.739018 23.07737 18.228863 21.35824 20.93195 32.918293 16.944197 16.208435 22.604399 18.884905 24.08884 17.879433 16.898716 20.615896 18.361687 13.178083 20.506409 19.884539 21.25203 31.576965 21.172367 23.241268 40.377537 28.670425 23.174568 21.904793 23.26397 23.034067 30.314133 16.38566 13.482925 27.053162 41.68323 20.34983 26.460367 20.312529 47.85788 19.757534 19.613083 15.2128 16.55318 21.989681 17.666874 15.600529 18.436493 20.638096 24.570824 9.917083 11.705486 12.618155 17.189966 27.256268 13.55797 15.050662 29.01196 25.446741 14.924695 15.050733 19.225395 15.259131 23.50286 21.359842 44.477226 13.89011 27.059683 29.5343 21.898554 36.329494 23.035252 18.430222 10.610268 14.813079 21.693935 42.0873 23.016363 20.275164 22.288942 34.550682 14.286659 13.735955] [16.5 28.4 50. 21.6 15.2 25.1 20.9 18.6 18.7 27.5 23.8 21.4 18.1 22.6 37.3 32.5 22.7 20.7 23.1 19.4 13.8 22.4 16.3 23. 17.1 19.3 19.2 37. 19.4 19.7 22.9 20.5 27.1 20. 19.5 21.8 19.6 11.7 20.2 20.4 19.1 23.6 20. 23.1 35.2 23.9 24.4 24.3 23.8 24.7 31.5 18.4 14. 28.6 39.8 21.5 26.6 19.2 43.5 19.6 17.6 13.9 15.6 25. 20.1 17.4 21.7 19.6 25.3 11. 7.2 12.1 23.7 30.7 17.1 23.2 28.2 28.7 16.7 11.9 17.5 12.7 24.3 20. 46.7 14.5 28. 31.2 23.4 44. 23.4 19.4 17.9 11.8 21.7 50. 22.8 19.5 16.1 32.9 17.2 13.6]
mse = mean_squared_error(y_test, fit_pred1)
print(mse)
9.22245242181814