1. 关闭GPU,只使用CPU
在importtensorflow之前,加上:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] =
指定使用哪一个GPU
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
2.设置keras占用GPU内存的比例:
import os
import tensorflow as tf
import keras.backend.tensorflow_backend as K
def get_session(gpu_fraction=0.3):
'''Assume that you have 6GB of GPU memory and want to allocate ~2GB'''
num_threads = os.environ.get('OMP_NUM_THREADS')
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
if num_threads:
return tf.Session(config=tf.ConfigProto(
gpu_options=gpu_options, intra_op_parallelism_threads=num_threads))
else:
return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
K.set_session(get_session())
同时指定GPU和内存 (转载自点击打开链接)
import os
import tensorflow as tf
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.3
set_session(tf.Session(config=config))
3.Callback中设置learning rate等Schedule,使用Early Stop:
# Callback that implements learning rate schedule
schedule = Step([20], [1e-4, 1e-6])
history = model.fit(X_train, Y_train,
batch_size=batch_size, nb_epoch=nb_epoch, validation_data=(X_test,Y_test),
callbacks=[schedule,
keras.callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=0,save_best_only=True, mode='auto')
# 该回调函数将在每个epoch后保存模型到filepath
# ,keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=0, mode='auto')# 当监测值不再改善时,该回调函数将中止训练.
#当early stop被激活(如发现loss相比上一个epoch训练没有下降),则经过patience个epoch后停止训练 ], verbose=2, shuffle=True)
Step()定义如下:
class Step(Callback):
def __init__(self, steps, learning_rates, verbose=0):
self.steps = steps
self.lr = learning_rates
self.verbose = verbose
def change_lr(self, new_lr):
old_lr = K.get_value(self.model.optimizer.lr)
K.set_value(self.model.optimizer.lr, new_lr)
if self.verbose == 1:
print('Learning rate is %g' %new_lr)
def on_epoch_begin(self, epoch, logs={}):
for i, step in enumerate(self.steps):
if epoch < step:
self.change_lr(self.lr[i])
return
self.change_lr(self.lr[i+1])
def get_config(self):
config = {'class': type(self).__name__,
'steps': self.steps,
'learning_rates': self.lr,
'verbose': self.verbose}
return config
@classmethod
def from_config(cls, config):
offset = config.get('epoch_offset', 0)
steps = [step - offset for step in config['steps']]
return cls(steps, config['learning_rates'],
verbose=config.get('verbose', 0))
附:gihub上的方法示例 (https://github.com/fchollet/keras/issues/888)
自定义schedule: learning rate是epoch的函数
def scheduler(epoch):
if epoch == 5:
model.lr.set_value(.02)
return model.lr.get_value()
change_lr = LearningRateScheduler(scheduler)
model.fit(x_embed, y, nb_epoch=1, batch_size = batch_size, show_accuracy=True,
callbacks=[chage_lr])
keras文档的函数: LearningRateScheduler
keras.callbacks.LearningRateScheduler(schedule)
Learning rate scheduler.
Arguments
schedule:a function that takes an epoch index as input (integer, indexed from0) and returns a new learning rate as output(float).注意schedule的返回值是float类型!
ReduceLROnPlateau
keras.callbacks.ReduceLROnPlateau(monitor='val_loss',factor=0.1, patience=10,
verbose=0, mode='auto', epsilon=0.0001,cooldown=0, min_lr=0)
Reduce learning rate when a metric has stopped improving.
Models often benefit from reducing the learning rate by a factor of 2-10 once learning stagnates. This callback monitors a quantity and if no improvement is seen for a 'patience' number of epochs, the learning rate is reduced.
Example
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)
model.fit(X_train, Y_train, callbacks=[reduce_lr])
4.使用sickt-learn GridSearchCV和keras的sickt-learn的classification,regression接口进行网格超参数寻优
注意:keras/tensorflow下使用GPU时,进行多进程的gridsearch会报错,这时设置默认状态只用单进程或者将后台修改成为theano。
也可以切换到CPU模式下进行多线程的参数网格搜索,实验发现在CPU上进行sickt-learn的GridSearch搜索速度更快(因为使用了所有CPU内核)
# Use scikit-learn to grid search the dropout rate
import numpy
from sklearn.grid_search import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = ""
# Function to create model, required for KerasClassifier
def create_model(dropout_rate=0.0, weight_constraint=0):
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, init='uniform',
activation='linear', W_constraint=maxnorm(weight_constraint)))
model.add(Dropout(dropout_rate))
model.add(Dense(1, init='uniform', activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(build_fn=create_model, nb_epoch=100, batch_size=10, verbose=2)
# define the grid search parameters
weight_constraint = [1, 2, 3, 4, 5]
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
param_grid = dict(dropout_rate=dropout_rate, weight_constraint=weight_constraint)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
for params, mean_score, scores in grid_result.grid_scores_:
print("%f (%f) with: %r" % (scores.mean(), scores.std(), params)
有趣的地方:
笔者切换不同后台,分别在CPU,GPU下运行搜索程序
tensorflow时GPU下使用单进程,CPU下使用多进程,理由上述已说明,发现使用tensorflow为后端,在CPU下多进程搜索速度最快(4.0s),GPU单进程搜索需要32s。
而当后端为theano时,CPU,GPU下搜索速度几乎一样(6.2s),慢一点。笔者使用GPU:Nvidia GTX1080,CPU:inter i7-6700k
参考:
http://www.cnblogs.com/dmzhuo/p/6214486.html
https://groups.google.com/forum/#!topic/keras-users/MFUEY9P1sc8