import lightgbm as lgb
lgb_params = {
'feature_fraction': 0.75, # 每次迭代的时候随机选择特征的比例,默认为1,训练前选好
'metric': 'rmse', # root square loss(平方根损失)
'nthread':1, # LightGBM 的线程数
'min_data_in_leaf': 2**7, # 一个叶子上数据的最小数量. 可以用来处理过拟合
'bagging_fraction': 0.75, # 类似于 feature_fraction, 但是它在训练时选特征
'learning_rate': 0.03, # 学习率
'objective': 'mse', # regression_l2, L2 loss, alias=regression, mean_squared_error, mse
'bagging_seed': 2**7, # bagging 随机数种子
'num_leaves': 2**7, # 一棵树上的叶子数
'bagging_freq':1, # bagging 的频率, 0 意味着禁用 bagging. k意味着每 k次迭代执行bagging
'verbose':1 # verbose: 详细信息模式,0 或者 1
}
estimator = lgb.train(lgb_params, lgb.Dataset(train_set.values, label=train_value.values), 300)
pred_test = estimator.predict(test_set.values)
pred_train = estimator.predict(train_set.values)
from sklearn.metrics import mean_squared_error
from math import sqrt
print('Train RMSE for %s is %f' % ('lightgbm', sqrt(mean_squared_error(train_value.clip(0,20).values, pred_train.clip(0,20)))))
# print('Test RMSE for %s is %f' % ('lightgbm', sqrt(mean_squared_error(test_value.clip(0,20).values, pred_test.clip(0,20)))))
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
estimator=lr
estimator.fit(train_set, train_value)
pred_test = estimator.predict(test_set)
pred_train = estimator.predict(train_set)
print('Train RMSE for %s is %f' % ('SGDRegressor模型', sqrt(mean_squared_error(train_value.clip(0,20), pred_train.clip(0,20)))))
单独使用LinearRegression模型结果得分如下:(1.25011)
单独使用KerasRegressor模型结果得分如下:(1.24982)
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
def baseline_model():
# create model
model = Sequential()
model.add(Dense(20, input_dim=train_set.shape[1], kernel_initializer='uniform', activation='softplus'))
model.add(Dense(1, kernel_initializer='uniform', activation = 'relu'))
# Compile model
model.compile(loss='mse', optimizer='Nadam', metrics=['mse'])
# model.compile(loss='mean_squared_error', optimizer='adam')
return model
estimator = KerasRegressor(build_fn=baseline_model, verbose=1, epochs=5, batch_size = 55000) # verbose: 详细信息模式,0 或者 1
estimator.fit(train_set, train_value)
pred_test = estimator.predict(test_set)
pred_train = estimator.predict(train_set)
print('Train RMSE for %s is %f' % ('SGDRegressor模型', sqrt(mean_squared_error(train_value.clip(0,20), pred_train.clip(0,20)))))
# print('Test RMSE for %s is %f' % ('lightgbm', sqrt(mean_squared_error(test_value.clip(0,20).values, pred_test.clip(0,20)))))