拿来主义:看我的代码,我是在模型acc和验证数据集val_acc都达到99.8%时候才终止训练。
import numpy as np import tflearn from tflearn.layers.core import dropout from tflearn.layers.normalization import batch_normalization from tflearn.data_utils import to_categorical from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report, confusion_matrix import sys class EarlyStoppingCallback(tflearn.callbacks.Callback): def __init__(self, val_acc_thresh): """ Note: We are free to define our init function however we please. """ # Store a validation accuracy threshold, which we can compare against # the current validation accuracy at, say, each epoch, each batch step, etc. self.val_acc_thresh = val_acc_thresh def on_epoch_end(self, training_state): """ This is the final method called in trainer.py in the epoch loop. We can stop training and leave without losing any information with a simple exception. """ #print dir(training_state) print("Terminating training at the end of epoch", training_state.epoch) if training_state.val_acc >= self.val_acc_thresh and training_state.acc_value >= self.val_acc_thresh: raise StopIteration def on_train_end(self, training_state): """ Furthermore, tflearn will then immediately call this method after we terminate training, (or when training ends regardless). This would be a good time to store any additional information that tflearn doesn't store already. """ print("Successfully left training! Final model accuracy:", training_state.acc_value) if __name__ == "__main__": training_data = [] with open("feature_with_dnn_todo.dat") as f: training_data = [parse_line(line) for line in f] X = training_data org_labels = [1 if int(x[0])==2.0 else 0 for x in X] labels = to_categorical(org_labels, nb_classes=2) data = [x[1:] for x in X] input_dim = len(data[0]) X = data Y = labels print "X len:", len(X), "Y len:", len(Y) trainX, testX, trainY, testY = train_test_split(X, Y, test_size=0.2, random_state=42) print trainX[0] print trainY[0] print testX[-1] print testY[-1] # Build neural network net = tflearn.input_data(shape=[None, input_dim]) # RMSProp | epoch: 100 | loss: 0.25209 - acc: 0.9109 | val_loss: 0.19742 - val_acc: 0.9392 -- iter: 14084/14084 remove unwanted_cols 2 # | RMSProp | epoch: 100 | loss: 0.29420 - acc: 0.9075 | val_loss: 0.14464 - val_acc: 0.9551 -- iter: 14084/14084 net = batch_normalization(net) dense1 = tflearn.fully_connected(net, 64, activation='tanh', regularizer='L2', weight_decay=0.001) dropout1 = tflearn.dropout(dense1, 0.8) dense2 = tflearn.fully_connected(dropout1, 64, activation='tanh', regularizer='L2', weight_decay=0.001) dropout2 = tflearn.dropout(dense2, 0.8) softmax = tflearn.fully_connected(dropout2, 2, activation='softmax') # Regression using SGD with learning rate decay and Top-3 accuracy net = tflearn.regression(softmax, optimizer="rmsprop", learning_rate=0.001, loss='categorical_crossentropy') """ #| Adam | epoch: 100 | loss: 0.15578 - acc: 0.9419 | val_loss: 0.16620 - val_acc: 0.9392 -- iter: 14084/14084 net = batch_normalization(net) net = tflearn.fully_connected(net, input_dim) net = tflearn.fully_connected(net, 128, activation='tanh') net = dropout(net, 0.5) net = tflearn.fully_connected(net, 2, activation='softmax') net = tflearn.regression(net, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy', name='target') """ # Define model model = tflearn.DNN(net) # Start training (apply gradient descent algorithm) # Initialize our callback with desired accuracy threshold. early_stopping_cb = EarlyStoppingCallback(val_acc_thresh=0.998) try: model.fit(trainX, trainY, validation_set=(testX, testY), n_epoch=500, batch_size=8, show_metric=True, callbacks=early_stopping_cb) except StopIteration as e: print "pass" filename = 'dns_tunnel998.tflearn' model.save(filename) model.load(filename) #model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True, batch_size=1024, n_epoch=5) #model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True, batch_size=1024, n_epoch=5) y_predict_list = model.predict(X) y_predict = [] for i in y_predict_list: #print i[0] if i[0] >= 0.5: y_predict.append(0) else: y_predict.append(1) print(classification_report(org_labels, y_predict)) print confusion_matrix(org_labels, y_predict)
The EarlyStoppingCallback Class
I show a proof-of-concept version of early stopping below. This is the simplest possible case: just stop training after the first epoch no matter what. It is up to the user to decide the conditions they want to trigger the stopping on.
class EarlyStoppingCallback(tflearn.callbacks.Callback): def __init__(self, val_acc_thresh): """ Note: We are free to define our init function however we please. """ # Store a validation accuracy threshold, which we can compare against # the current validation accuracy at, say, each epoch, each batch step, etc. self.val_acc_thresh = val_acc_thresh def on_epoch_end(self, training_state): """ This is the final method called in trainer.py in the epoch loop. We can stop training and leave without losing any information with a simple exception. """ print("Terminating training at the end of epoch", training_state.epoch) raise StopIteration def on_train_end(self, training_state): """ Furthermore, tflearn will then immediately call this method after we terminate training, (or when training ends regardless). This would be a good time to store any additional information that tflearn doesn't store already. """ print("Successfully left training! Final model accuracy:", training_state.acc_value) # Initialize our callback with desired accuracy threshold. early_stopping_cb = EarlyStoppingCallback(val_acc_thresh=0.5)
Result: Train the Model and Stop Early
try:
# Give it to our trainer and let it fit the data.
trainer.fit(feed_dicts={X: trainX, Y: trainY}, val_feed_dicts={X: testX, Y: testY}, n_epoch=1, show_metric=True, # Calculate accuracy and display at every step. callbacks=early_stopping_cb) except StopIteration: print("Caught callback exception. Returning control to user program.")
Training Step: 860 | total loss: [1m[32m1.73372[0m[0m
| Optimizer | epoch: 002 | loss: 1.73372 - acc: 0.8196 | val_loss: 1.87058 - val_acc: 0.8011 -- iter: 55000/55000
Training Step: 860 | total loss: [1m[32m1.73372[0m[0m
| Optimizer | epoch: 002 | loss: 1.73372 - acc: 0.8196 | val_loss: 1.87058 - val_acc: 0.8011 -- iter: 55000/55000
--
Terminating training at the end of epoch 2
Successfully left training! Final model accuracy: 0.8196054697036743
Caught callback exception. Returning control to user program.
Appendix
For my own reference, this is the code I started with before tinkering with the early stopping solution above.
from __future__ import division, print_function, absolute_import import os import sys import tempfile import urllib import collections import math import numpy as np import tensorflow as tf from scipy.io import arff import tflearn from sklearn.utils import shuffle from sklearn.metrics import roc_auc_score from tflearn.data_utils import shuffle, to_categorical from tflearn.layers.core import input_data, dropout, fully_connected from tflearn.layers.conv import conv_2d, max_pool_2d from tflearn.layers.normalization import local_response_normalization, batch_normalization from tflearn.layers.estimator import regression import tflearn.datasets.mnist as mnist # Load the data and handle any preprocessing here. X, Y, testX, testY = mnist.load_data(one_hot=True) X, Y = shuffle(X, Y) X = X.reshape([-1, 28, 28, 1]) testX = testX.reshape([-1, 28, 28, 1]) # Define our network architecture: a simple 2-layer network of the form # InputImages -> Fully Connected -> Softmax out_readin1 = input_data(shape=[None,28,28,1]) out_fully_connected2 = fully_connected(out_readin1, 10) out_softmax3 = fully_connected(out_fully_connected2, 10, activation='softmax') hash='f0c188c3777519fb93f1a825ca758a0c' scriptid='MNIST-f0c188c3777519fb93f1a825ca758a0c' # Define our training metrics. network = regression(out_softmax3, optimizer='adam', learning_rate=0.01, loss='categorical_crossentropy', name='target') model = tflearn.DNN(network, tensorboard_verbose=3) try: model.fit(X, Y, n_epoch=1, validation_set=(testX, testY), snapshot_epoch=False, show_metric=True, run_id=scriptid,callbacks=early_stopping_cb) except StopIteration: print("Caught callback exception. Returning control to user program.") prediction = model.predict(testX) auc=roc_auc_score(testY, prediction, average='macro', sample_weight=None) accuracy=model.evaluate(testX,testY) print("Accuracy:", accuracy) print("ROC AUC Score:", auc)
Training Step: 860 | total loss: [1m[32m0.30941[0m[0m
| Adam | epoch: 001 | loss: 0.30941 - acc: 0.9125 -- iter: 55000/55000
Terminating training at the end of epoch 1
Successfully left training! Final model accuracy: 0.9125033020973206
Caught callback exception. Returning control to user program.
Accuracy: [0.90410000000000001]
ROC AUC Score: 0.992379719297
参考:http://mckinziebrandon.me/TensorflowNotebooks/2016/11/19/tflearn-only.html
TFLearn
19 Nov 2016
Examples::Extending Tensorflow::Trainer
import tensorflow as tf
import tflearn import tflearn.datasets.mnist as mnist trainX, trainY, testX, testY = mnist.load_data(one_hot=True)
hdf5 not supported (please install/reinstall h5py)
Extracting mnist/train-images-idx3-ubyte.gz
Extracting mnist/train-labels-idx1-ubyte.gz
Extracting mnist/t10k-images-idx3-ubyte.gz
Extracting mnist/t10k-labels-idx1-ubyte.gz
Define the Architecture (Basic Tensorflow)
# Because I don't feel like retyping stuff.
def tfp(shape): return tf.placeholder("float", shape) def tfrn(shape, name): return tf.Variable(tf.random_normal(shape), name=name) # Define the inputs/outputs/weights as usual. X, Y = tfp([None, 784]), tfp([None, 10]) W1, W2, W3 = tfrn([784, 256], 'W1'), tfrn([256, 256], 'W2'), tfrn([256, 10], 'W3') b1, b2, b3 = tfrn([256], 'b1'), tfrn([256], 'b2'), tfrn([10], 'b3') # Multilayer perceptron. def dnn(x): x = tf.tanh(tf.add(tf.matmul(x, W1), b1)) x = tf.tanh(tf.add(tf.matmul(x, W2), b2)) x = tf.add(tf.matmul(x, W3), b3) return x net = dnn(X) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net, Y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1) accuracy = tf.reduce_mean(tf.cast( tf.equal( tf.argmax(net, 1), tf.argmax(Y, 1) ), tf.float32), name='acc')
Using a TFLearn Trainer
trainop = tflearn.TrainOp(loss=loss, optimizer=optimizer, metric=accuracy, batch_size=128) trainer = tflearn.Trainer(train_ops=trainop, tensorboard_verbose=1)
trainer.fit({X: trainX, Y: trainY}, val_feed_dicts={X: testX, Y: testY}, n_epoch=2, show_metric=True)
Training Step: 860 | total loss: [1m[32m1.73376[0m[0m
| Optimizer | epoch: 002 | loss: 1.73376 - acc: 0.8053 | val_loss: 1.78279 - val_acc: 0.8015 -- iter: 55000/55000
Training Step: 860 | total loss: [1m[32m1.73376[0m[0m
| Optimizer | epoch: 002 | loss: 1.73376 - acc: 0.8053 | val_loss: 1.78279 - val_acc: 0.8015 -- iter: 55000/55000
--
Training Callbacks
One suggestion for early stopping with tflearn (made by owner of tflearn repository) is to define a custom callback that raises an exception when we want to stop training. I’ve written a small snippet below as an example.
class EarlyStoppingCallback(tflearn.callbacks.Callback): def __init__(self, acc_thresh): """ Args: acc_thresh - if our accuracy > acc_thresh, terminate training. """ self.acc_thresh = acc_thresh self.accs = [] def on_epoch_end(self, training_state): """ """ self.accs.append(training_state.global_acc) if training_state.val_acc is not None and training_state.val_acc < self.acc_thresh: raise StopIteration
cb = EarlyStoppingCallback(acc_thresh=0.5) trainer.fit({X: trainX, Y: trainY}, val_feed_dicts={X: testX, Y: testY}, n_epoch=3, show_metric=True, snapshot_epoch=False, callbacks=cb)
Training Step: 3965 | total loss: [1m[32m0.33810[0m[0m
| Optimizer | epoch: 010 | loss: 0.33810 - acc: 0.9455 -- iter: 55000/55000
GOODBYE
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
in ()
2 trainer.fit({X: trainX, Y: trainY}, val_feed_dicts={X: testX, Y: testY},
3 n_epoch=3, show_metric=True, snapshot_epoch=False,
----> 4 callbacks=cb)
/usr/local/lib/python3.5/dist-packages/tflearn/helpers/trainer.py in fit(self, feed_dicts, n_epoch, val_feed_dicts, show_metric, snapshot_step, snapshot_epoch, shuffle_all, dprep_dict, daug_dict, excl_trainops, run_id, callbacks)
315
316 # Epoch end
--> 317 caller.on_epoch_end(self.training_state)
318
319 finally:
/usr/local/lib/python3.5/dist-packages/tflearn/callbacks.py in on_epoch_end(self, training_state)
67 def on_epoch_end(self, training_state):
68 for callback in self.callbacks:
---> 69 callback.on_epoch_end(training_state)
70
71 def on_train_end(self, training_state):
in on_epoch_end(self, training_state)
13 if True:
14 print("GOODBYE")
---> 15 raise StopIteration
StopIteration:
cb.accs
[None]
参考:
Early Stopping with TensorFlow and TFLearn
20 Nov 2016import tensorflow as tf
import tflearn import tflearn.datasets.mnist as mnist trainX, trainY, testX, testY = mnist.load_data(one_hot=True)
hdf5 not supported (please install/reinstall h5py)
Extracting mnist/train-images-idx3-ubyte.gz
Extracting mnist/train-labels-idx1-ubyte.gz
Extracting mnist/t10k-images-idx3-ubyte.gz
Extracting mnist/t10k-labels-idx1-ubyte.gz
n_features = 784
n_hidden = 256 n_classes = 10 # Define the inputs/outputs/weights as usual. X = tf.placeholder("float", [None, n_features]) Y = tf.placeholder("float", [None, n_classes]) # Define the connections/weights and biases between layers. W1 = tf.Variable(tf.random_normal([n_features, n_hidden]), name='W1') W2 = tf.Variable(tf.random_normal([n_hidden, n_hidden]), name='W2') W3 = tf.Variable(tf.random_normal([n_hidden, n_classes]), name='W3') b1 = tf.Variable(tf.random_normal([n_hidden]), name='b1') b2 = tf.Variable(tf.random_normal([n_hidden]), name='b2') b3 = tf.Variable(tf.random_normal([n_classes]), name='b3') # Define the operations throughout the network. net = tf.tanh(tf.add(tf.matmul(X, W1), b1)) net = tf.tanh(tf.add(tf.matmul(net, W2), b2)) net = tf.add(tf.matmul(net, W3), b3) # Define the optimization problem. loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net, Y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1) accuracy = tf.reduce_mean(tf.cast( tf.equal(tf.argmax(net, 1), tf.argmax(Y, 1) ), tf.float32), name='acc')
Early Stopping
Training Setup
In tflearn, we can train our model with a tflearn.Trainer object: “Generic class to handle any TensorFlow graph training. It requires the use of TrainOp to specify all optimization parameters.”
-
TrainOp represents a set of operation used for optimizing a network.
-
Example: Time to initialize our trainer to work with our MNIST network. Below we create a TrainOp object that is then used for the purpose of telling our trainer
- Our loss function. (softmax cross entropy with logits)
- Our optimizer. (GradientDescentOptimizer)
- Our evaluation [tensor] metric. (classification accuracy)
trainop = tflearn.TrainOp(loss=loss, optimizer=optimizer, metric=accuracy, batch_size=128) trainer = tflearn.Trainer(train_ops=trainop, tensorboard_verbose=1)
Callbacks
The Callbacks interface describes a set of methods that we can implement ourselves that will be called during runtime. Below are our options, where here we will be primarily concerned with the on_epoch_end() method. * __ Methods __ :
def on_train_begin(self, training_state): def on_epoch_begin(self, training_state): def on_batch_begin(self, training_state): def on_sub_batch_begin(self, training_state): def on_sub_batch_end(self, training_state, train_index=0): def on_batch_end(self, training_state, snapshot=False): def on_epoch_end(self, training_state): def on_train_end(self, training_state):
- TrainingState: Notice that each method requires us to pass a training_state object as an argument. These useful helpers will be able to provide us with the information we need to determine when to stop training. Below is a list of the instance variables we can access with a training_state object:
- self.epoch
- self.step
- self.current_iter
- self.acc_value
- self.loss_value
- self.val_acc
- self.val_loss
- self.best_accuracy
- self.global_acc
- self.global_loss
- Implementing our Callback: Let’s say we want to stop training when the validation accuracy reaches a certain threshold. Below, we implement the code required to define such a callback and fit the MNIST data.
class EarlyStoppingCallback(tflearn.callbacks.Callback): def __init__(self, val_acc_thresh): """ Note: We are free to define our init function however we please. """ self.val_acc_thresh = val_acc_thresh def on_epoch_end(self, training_state): """ """ # Apparently this can happen. if training_state.val_acc is None: return if training_state.val_acc > self.val_acc_thresh: raise StopIteration
# Initializae our callback.
early_stopping_cb = EarlyStoppingCallback(val_acc_thresh=0.5) # Give it to our trainer and let it fit the data. trainer.fit(feed_dicts={X: trainX, Y: trainY}, val_feed_dicts={X: testX, Y: testY}, n_epoch=2, show_metric=True, # Calculate accuracy and display at every step. snapshot_epoch=False, callbacks=early_stopping_cb)
Training Step: 1720 | total loss: [1m[32m0.81290[0m[0m
| Optimizer | epoch: 004 | loss: 0.81290 - acc_2: 0.8854 -- iter: 55000/55000
Using tf.contrib.learn instead
Iris data loading/tutorial prep
Note: can also load via: ```python import csv import random import numpy as np from sklearn import datasets from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42) iris = datasets.load_iris() print(iris.data.shape) print(“Xt”, X_train.shape, “Yt”, y_train.shape) ```
from __future__ import absolute_import
from __future__ import division from __future__ import print_function # Suppress the massive amount of warnings. tf.logging.set_verbosity(tf.logging.ERROR) # Data sets IRIS_TRAINING = "iris_training.csv" IRIS_TEST = "iris_test.csv" # Load datasets. training_set = tf.contrib.learn.datasets.base.load_csv_with_header(filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32) test_set = tf.contrib.learn.datasets.base.load_csv_with_header(filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32) # Specify that all features have real-value data feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)] # Build 3 layer DNN with 10, 20, 10 units respectively. classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3, model_dir="/tmp/iris_model") # Fit model. classifier.fit(x=X_train, y=y_train, steps=2000) # Evaluate accuracy. accuracy_score = classifier.evaluate(x=X_test, y=y_test)["accuracy"] print('Accuracy: {0:f}'.format(accuracy_score)) # Classify two new flower samples. new_samples = np.array([[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=np.float32) y = classifier.predict(new_samples) print('Predictions: {}'.format(str(y)))
Accuracy: 0.980000
Predictions: [1 1]
Validation Monitors
# Vanilla version
validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(test_set.data, test_set.target, every_n_steps=50) classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3, model_dir="/tmp/iris_model", config=tf.contrib.learn.RunConfig( save_checkpoints_secs=1)) classifier.fit(x=training_set.data, y=training_set.target, steps=2000, monitors=[validation_monitor])
Estimator(params={'dropout': None, 'hidden_units': [10, 20, 10], 'weight_column_name': None, 'feature_columns': [_RealValuedColumn(column_name='', dimension=4, default_value=None, dtype=tf.float32, normalizer=None)], 'optimizer': 'Adagrad', 'n_classes': 3, 'activation_fn': , 'num_ps_replicas': 0, 'gradient_clip_norm': None, 'enable_centered_bias': True})
Customizing the Evaluation Metrics and Stopping Early
If we run the code below, it stops early! Warning: You’re going to see a lot of WARNING print outputs from tf. I guess this tutorial is a bit out of date. But that’s not what we care abot here, we just want that early stopping! The important output to notice is
INFO:tensorflow:Validation (step 22556): accuracy = 0.966667, global_step = 22535, loss = 0.2767 INFO:tensorflow:Stopping. Best step: 22356 with loss = 0.2758353650569916.
validation_metrics = {"accuracy": tf.contrib.metrics.streaming_accuracy, "precision": tf.contrib.metrics.streaming_precision, "recall": tf.contrib.metrics.streaming_recall} validation_monitor = tf.contrib.learn.monitors.ValidationMonitor( test_set.data, test_set.target, every_n_steps=50, #metrics=validation_metrics, early_stopping_metric='loss', early_stopping_metric_minimize=True, early_stopping_rounds=200) tf.logging.set_verbosity(tf.logging.ERROR) classifier.fit(x=training_set.data, y=training_set.target, steps=2000, monitors=[validation_monitor])
Estimator(params={'dropout': None, 'hidden_units': [10, 20, 10], 'weight_column_name': None, 'feature_columns': [_RealValuedColumn(column_name='', dimension=4, default_value=None, dtype=tf.float32, normalizer=None)], 'optimizer': 'Adagrad', 'n_classes': 3, 'activation_fn': , 'num_ps_replicas': 0, 'gradient_clip_norm': None, 'enable_centered_bias': True})