以下代码使用KerasClassifier构建简单神经网络,对iris多分类问题进行训练,最后加入10折交叉验证,对模型准确率和方差进行计算。
from sklearn import datasets
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
# 导入数据
dataset = datasets.load_iris()
x = dataset.data
Y = dataset.target
# 设定随机种子
seed = 7
np.random.seed(seed)
# 构建模型函数
def create_model(optimizer='adam', init='glorot_uniform'):
# 构建模型
model = Sequential()
model.add(Dense(units=4, activation='relu', input_dim=4, kernel_initializer=init))
model.add(Dense(units=6, activation='relu', kernel_initializer=init))
model.add(Dense(units=3, activation='softmax', kernel_initializer=init))
# 编译模型
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
return model
model = KerasClassifier(build_fn=create_model, epochs=200, batch_size=5, verbose=0)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(model, x, Y, cv=kfold)
print('Accuracy: %.2f%% (%.2f)' % (results.mean()*100, results.std()))
接下来,利用Boston数据集进行KerasRegressior使用
from sklearn import datasets
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
# 导入数据
dataset = datasets.load_boston()
x = dataset.data
Y = dataset.target
# 设定随机种子
seed = 7
np.random.seed(seed)
构建模型函数,需要注意,回归问题不需要分类,此处的输出层不需要激活函数,直接返回结果
# 构建模型函数
def creat_model(units_list=[13],optimizer='adam',init='normal'):
# 构建模型
model = Sequential()
# 构建第一个隐藏层和输入层
units = units_list[0]
model.add(Dense(units=units,activation = 'relu',input_dim=13,kernel_initializer = init))
# 构建更多隐藏层
for units in units_list[1]:
model.add(Dense(units=units,activation='relu', kernel_initializer = init))
# 构建输出层,由于回归问题不需要对预测结果进行分类,因此不需要激活函数
model.add(Dense(units=1,kernel_initializer = init))
# 编译函数,指定用于评估一组权重的损失函数loss(回归问题一般用均方误差MSE)、用于搜索网络不同权重的优化器optimizer
model.compile(loss='mean_squared_error',optimizer = optimizer)
return model
KerasRegreesor调用模型函数,并设置对应的迭代次数和每次更新权重的每一批次的实例个数。最后运用k折交叉验证看结果
model = KerasRegressor(build_fn = creat_model, epochs = 200, batch_size = 5, verbose = 0)
# 设置算法评估基准
kfold = KFold(n_splits = 10, shuffle = True, random_state = seed)
results = cross_val_score(model, x, Y, cv = kfold)
print('Baseline: %.2f (%.2f) MSE' % (results.mean(), results.std()))
接下来对以上模型进行优化。
第一,上述Boston的输入数据尺度不同,即各指标间数据差值较大,可能会导致模型偏差,因此需要事先对输入数据,即x进行标准化处理。此处在模型评估过程中使用scikit-learn的pipeline框架,首先对数据集标准化,之后创建和评估基线神经网络。使用pieline框架可以在交叉验证的每一个折中执行数据标准化处理。
# 数据标准化,改进算法
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
steps=[]
steps.append(('standardized',StandardScaler()))
steps.append(('mlp',model))
pipeline = Pipeline(steps)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, x, Y, cv=kfold)
print('StandardScaler: %.2f (%.2f) MSE' % (results.mean(), results.std()))
第二、可对基准神经网络进行优化,即使用网络搜索方法,找到最优超参数。
#利用网络搜索的方式优化模型
from sklearn.model_selection import GridSearchCV
# 调参选择最优参数
param_grid={}
param_grid['units_list']=[[20],[13,2],[13,6]] # 其中第一个参数表示第一隐藏层包含的神经元个数,第二个参数表示隐藏层个数
param_grid['optimizer'] = ['rmsprop','adam']
param_grid['init'] = ['glorot_uniform','normal']
param_grid['epochs'] = [100,200]
param_grid['batch_size'] = [5,20]
#调参
scaler = StandardScaler()
scaler_x = scaler.fit_transform(x)
grid = GridSearchCV(estimator = model, param_grid = param_grid)
results = grid.fit(scaler_x,Y)
for mean, std, param in zip(means,stds,params):
print('%f (%f) with: %r'% (mean, std, param))
该文代码源自魏贞原《深度学习:基于Keras的Python实践》