该项目主要介绍贝叶斯优化的Python实现,使用Pycharm完成!分为机器学习和深度学习两个小案例,数据及代码文件。
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier as KNN # KNN模型
from sklearn.model_selection import cross_val_score # 交叉验证评估
from sklearn.metrics import accuracy_score # 计算准确率
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale # 标准化
from hyperopt import hp
from hyperopt import fmin # 最小化目标函数
from hyperopt import tpe # 搜索算法
from hyperopt import Trials
from hyperopt import STATUS_OK
hyperopt即贝叶斯优化库,下面使用的数据为iris数据,调整的超参数为:是否标准化(scale),KNN模型的近邻数(n_neighbors)。
iris = pd.read_csv('E:/Jupyter/Mojin/HyperparameterOpt/data/iris.csv')
print('iris shape: {0} \n iris tail(10): \n {1}'.format(iris.shape, iris.tail(10)))
# 将Species指标数据类型转换成category
Species = iris['Species'].astype('category')
# 使用标签的编码作为真正的数据
iris['Species'] = Species.cat.codes
print('iris tail(10): \n{0}'.format(iris.tail(10)))
[输出:]
可以看到:总共有150个样本,其中Species指标为我们的目标变量,
其他的为特征变量,其中Species已经数值化。
features = ['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width']
trainX, testX, trainY, testY = train_test_split(iris[features], iris['Species'],
test_size=0.25, # 25%数据作为测试集
random_state=1234)
print('trainX shape: {0} \n trainY shape: {1}'.format(trainX.shape, trainY.shape))
def hyperopt_train_test(params, X=trainX, Y=trainY):
X_ = X[:]
if 'scale' in params:
if params['scale'] == 1:
X_ = scale(X_) # 数据标准化
del params['scale']
model = KNN(n_neighbors=params['n_neighbors'])
return cross_val_score(model, X_, Y, cv=3).mean() # 3折交叉验证均值
# 需要调参的超参数空间域
space = {'scale': hp.choice('scale', [0, 1]),
'n_neighbors': hp.choice('n_neighbors', range(1, 70))}
def f(params):
acc = hyperopt_train_test(params=params)
return {'loss': -acc, 'status': STATUS_OK}
trials = Trials()
best = fmin(fn=f,
space=space,
algo=tpe.suggest,
max_evals=100,
trials=trials)
print('best hyperparameter:\n', best)
[输出:]
可以看到,最优参数组合为:近邻数为8,不需要进行数据标准化,其准确率约为:97.30%!
model = KNN(n_neighbors=best['n_neighbors']).fit(trainX, trainY)
predY = model.predict(testX)
testAcc = accuracy_score(testY, predY)
print('test accuracy: {0}'.format(testAcc)) # 数据量有点少!
[输出:]
可以看到测试集的准确率约为:97.37%!
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import RMSprop
from keras.datasets import mnist
from keras.utils import np_utils
hyperas是将hyperopt与keras相结合的库,下面使用的数据为:mnist数据集!
def data():
(X_train, y_train), (X_test, y_test) = mnist.load_data('F:/DL-data/mnist.npz')
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
nb_classes = 10
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
return X_train, Y_train, X_test, Y_test
def model(X_train, Y_train, X_test, Y_test):
model = Sequential()
model.add(Dense(512, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dropout({{uniform(0, 1)}}))
model.add(Dense({{choice([256, 512, 1024])}}))
model.add(Activation('relu'))
model.add(Dropout({{uniform(0, 1)}}))
model.add(Dense(10))
model.add(Activation('softmax'))
rms = RMSprop()
model.compile(loss='categorical_crossentropy', optimizer=rms, metrics=['accuracy'])
model.fit(X_train, Y_train,
batch_size={{choice([64, 128])}},
epochs=1,
verbose=2,
validation_data=(X_test, Y_test))
score, acc = model.evaluate(X_test, Y_test, verbose=0)
print('Test accuracy:', acc)
return {'loss': -acc, 'status': STATUS_OK, 'model': model}
if __name__ == '__main__':
X_train, Y_train, X_test, Y_test = data()
best_run, best_model = optim.minimize(model=model,
data=data,
algo=tpe.suggest,
max_evals=5,
trials=Trials())
print("Evalutation of best performing model:")
print(best_model.evaluate(X_test, Y_test))
print(best_model)
[输出:]
测试集的准确率为:96.06%!