链接:https://pan.baidu.com/s/17ou-yYTlUKACnrbmq1TV-Q
提取码:gsjy
import lightgbm as lgb
# 读取数据集
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
# 构建训练集和验证集
X = train.drop(columns=['Id', 'SalePrice'], axis=1).values # 说明:Id不是特征,SalePrice是标签,需要屏蔽
y = train['SalePrice'].values # 标签 SalePrice
# K折交叉验证
kf = KFold(n_splits=10)
rmse_scores = []
for train_indices, test_indices in kf.split(X):
X_train, X_test = X[train_indices], X[test_indices]
y_train, y_test = y[train_indices], y[test_indices]
# 初始化模型
LGBR = lgb.LGBMRegressor() # 基模型
# 训练/fit拟合
LGBR.fit(X_train, y_train)
# 预测
y_pred = LGBR.predict(X_test)
# 评估
rmse = mean_squared_error(y_test, y_pred)
# 累计结果
rmse_scores.append(rmse)
print("rmse scores : ", rmse_scores)
print(f'average rmse scores : {np.mean(rmse_scores)}')
train_data = lgb.Dataset(X_train, label=y_train) # 训练集
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data) # 验证集
# 参数
params = {
'objective':'regression', # 目标任务
'metric':'rmse', # 评估指标
'learning_rate':0.1, # 学习率
'max_depth':15, # 树的深度
'num_leaves':20, # 叶子数
}
# 创建模型对象
model = lgb.train(params=params,
train_set=train_data,
num_boost_round=300,
early_stopping_rounds=30,
valid_names=['test'],
valid_sets=[test_data])
score = model.best_score['test']['rmse']
score
test_pred = model.predict(test.drop('Id',axis=1).values)