博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
TFLearn 在给定模型精度时候提前终止训练
阅读量:7060 次
发布时间:2019-06-28

本文共 21982 字,大约阅读时间需要 73 分钟。

拿来主义:看我的代码,我是在模型acc和验证数据集val_acc都达到99.8%时候才终止训练。

import numpy as npimport tflearnfrom tflearn.layers.core import dropoutfrom tflearn.layers.normalization import batch_normalizationfrom tflearn.data_utils import to_categoricalfrom sklearn.model_selection import train_test_splitfrom sklearn.metrics import classification_report, confusion_matriximport sysclass EarlyStoppingCallback(tflearn.callbacks.Callback):    def __init__(self, val_acc_thresh):        """ Note: We are free to define our init function however we please. """        # Store a validation accuracy threshold, which we can compare against        # the current validation accuracy at, say, each epoch, each batch step, etc.        self.val_acc_thresh = val_acc_thresh    def on_epoch_end(self, training_state):        """         This is the final method called in trainer.py in the epoch loop.         We can stop training and leave without losing any information with a simple exception.          """        #print dir(training_state)        print("Terminating training at the end of epoch", training_state.epoch)        if training_state.val_acc >= self.val_acc_thresh and training_state.acc_value >= self.val_acc_thresh:            raise StopIteration    def on_train_end(self, training_state):        """        Furthermore, tflearn will then immediately call this method after we terminate training,         (or when training ends regardless). This would be a good time to store any additional         information that tflearn doesn't store already.        """        print("Successfully left training! Final model accuracy:", training_state.acc_value)if __name__ == "__main__":    training_data = []    with open("feature_with_dnn_todo.dat") as f:        training_data = [parse_line(line) for line in f]    X = training_data    org_labels = [1 if int(x[0])==2.0 else 0 for x in X]    labels = to_categorical(org_labels, nb_classes=2)    data = [x[1:] for x in X]    input_dim = len(data[0])    X = data    Y = labels    print "X len:", len(X), "Y len:", len(Y)    trainX, testX, trainY, testY = train_test_split(X, Y, test_size=0.2, random_state=42)    print trainX[0]    print trainY[0]    print testX[-1]    print testY[-1]    # Build neural network       net = tflearn.input_data(shape=[None, input_dim])    #  RMSProp | epoch: 100 | loss: 0.25209 - acc: 0.9109 | val_loss: 0.19742 - val_acc: 0.9392 -- iter: 14084/14084 remove unwanted_cols 2    # | RMSProp | epoch: 100 | loss: 0.29420 - acc: 0.9075 | val_loss: 0.14464 - val_acc: 0.9551 -- iter: 14084/14084    net = batch_normalization(net)    dense1 = tflearn.fully_connected(net, 64, activation='tanh',                                 regularizer='L2', weight_decay=0.001)    dropout1 = tflearn.dropout(dense1, 0.8)    dense2 = tflearn.fully_connected(dropout1, 64, activation='tanh',                                 regularizer='L2', weight_decay=0.001)    dropout2 = tflearn.dropout(dense2, 0.8)    softmax = tflearn.fully_connected(dropout2, 2, activation='softmax')    # Regression using SGD with learning rate decay and Top-3 accuracy    net = tflearn.regression(softmax, optimizer="rmsprop", learning_rate=0.001, loss='categorical_crossentropy')    """    #| Adam | epoch: 100 | loss: 0.15578 - acc: 0.9419 | val_loss: 0.16620 - val_acc: 0.9392 -- iter: 14084/14084    net = batch_normalization(net)    net = tflearn.fully_connected(net, input_dim)     net = tflearn.fully_connected(net, 128, activation='tanh')     net = dropout(net, 0.5)    net = tflearn.fully_connected(net, 2, activation='softmax')    net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,                     loss='categorical_crossentropy', name='target')    """    # Define model    model = tflearn.DNN(net)    # Start training (apply gradient descent algorithm)    # Initialize our callback with desired accuracy threshold.      early_stopping_cb = EarlyStoppingCallback(val_acc_thresh=0.998)    try:        model.fit(trainX, trainY, validation_set=(testX, testY), n_epoch=500, batch_size=8, show_metric=True, callbacks=early_stopping_cb)    except StopIteration as e:        print "pass"    filename = 'dns_tunnel998.tflearn'    model.save(filename)    model.load(filename)    #model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True, batch_size=1024, n_epoch=5)    #model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True, batch_size=1024, n_epoch=5)    y_predict_list = model.predict(X)    y_predict = []    for i in y_predict_list:        #print  i[0]        if i[0] >= 0.5:            y_predict.append(0)        else:            y_predict.append(1)    print(classification_report(org_labels, y_predict))    print confusion_matrix(org_labels, y_predict)

 

The EarlyStoppingCallback Class

I show a proof-of-concept version of early stopping below. This is the simplest possible case: just stop training after the first epoch no matter what. It is up to the user to decide the conditions they want to trigger the stopping on.

class EarlyStoppingCallback(tflearn.callbacks.Callback): def __init__(self, val_acc_thresh): """ Note: We are free to define our init function however we please. """ # Store a validation accuracy threshold, which we can compare against # the current validation accuracy at, say, each epoch, each batch step, etc. self.val_acc_thresh = val_acc_thresh def on_epoch_end(self, training_state): """ This is the final method called in trainer.py in the epoch loop. We can stop training and leave without losing any information with a simple exception. """ print("Terminating training at the end of epoch", training_state.epoch) raise StopIteration def on_train_end(self, training_state): """ Furthermore, tflearn will then immediately call this method after we terminate training, (or when training ends regardless). This would be a good time to store any additional information that tflearn doesn't store already. """ print("Successfully left training! Final model accuracy:", training_state.acc_value) # Initialize our callback with desired accuracy threshold. early_stopping_cb = EarlyStoppingCallback(val_acc_thresh=0.5)

Result: Train the Model and Stop Early

try:    # Give it to our trainer and let it fit the data.     trainer.fit(feed_dicts={ X: trainX, Y: trainY}, val_feed_dicts={ X: testX, Y: testY}, n_epoch=1, show_metric=True, # Calculate accuracy and display at every step. callbacks=early_stopping_cb) except StopIteration: print("Caught callback exception. Returning control to user program.")
Training Step: 860  | total loss: [1m[32m1.73372[0m[0m| Optimizer | epoch: 002 | loss: 1.73372 - acc: 0.8196 | val_loss: 1.87058 - val_acc: 0.8011 -- iter: 55000/55000Training Step: 860  | total loss: [1m[32m1.73372[0m[0m| Optimizer | epoch: 002 | loss: 1.73372 - acc: 0.8196 | val_loss: 1.87058 - val_acc: 0.8011 -- iter: 55000/55000--Terminating training at the end of epoch 2Successfully left training! Final model accuracy: 0.8196054697036743Caught callback exception. Returning control to user program.

Appendix

For my own reference, this is the code I started with before tinkering with the early stopping solution above.

from __future__ import division, print_function, absolute_import import os import sys import tempfile import urllib import collections import math import numpy as np import tensorflow as tf from scipy.io import arff import tflearn from sklearn.utils import shuffle from sklearn.metrics import roc_auc_score from tflearn.data_utils import shuffle, to_categorical from tflearn.layers.core import input_data, dropout, fully_connected from tflearn.layers.conv import conv_2d, max_pool_2d from tflearn.layers.normalization import local_response_normalization, batch_normalization from tflearn.layers.estimator import regression import tflearn.datasets.mnist as mnist # Load the data and handle any preprocessing here. X, Y, testX, testY = mnist.load_data(one_hot=True) X, Y = shuffle(X, Y) X = X.reshape([-1, 28, 28, 1]) testX = testX.reshape([-1, 28, 28, 1]) # Define our network architecture: a simple 2-layer network of the form # InputImages -> Fully Connected -> Softmax out_readin1 = input_data(shape=[None,28,28,1]) out_fully_connected2 = fully_connected(out_readin1, 10) out_softmax3 = fully_connected(out_fully_connected2, 10, activation='softmax') hash='f0c188c3777519fb93f1a825ca758a0c' scriptid='MNIST-f0c188c3777519fb93f1a825ca758a0c' # Define our training metrics. network = regression(out_softmax3, optimizer='adam', learning_rate=0.01, loss='categorical_crossentropy', name='target') model = tflearn.DNN(network, tensorboard_verbose=3) try: model.fit(X, Y, n_epoch=1, validation_set=(testX, testY), snapshot_epoch=False, show_metric=True, run_id=scriptid,callbacks=early_stopping_cb) except StopIteration: print("Caught callback exception. Returning control to user program.") prediction = model.predict(testX) auc=roc_auc_score(testY, prediction, average='macro', sample_weight=None) accuracy=model.evaluate(testX,testY) print("Accuracy:", accuracy) print("ROC AUC Score:", auc)
Training Step: 860  | total loss: [1m[32m0.30941[0m[0m| Adam | epoch: 001 | loss: 0.30941 - acc: 0.9125 -- iter: 55000/55000Terminating training at the end of epoch 1Successfully left training! Final model accuracy: 0.9125033020973206Caught callback exception. Returning control to user program.Accuracy: [0.90410000000000001]ROC AUC Score: 0.992379719297

参考:http://mckinziebrandon.me/TensorflowNotebooks/2016/11/19/tflearn-only.html

TFLearn

Examples::Extending Tensorflow::Trainer

import tensorflow as tfimport tflearn import tflearn.datasets.mnist as mnist trainX, trainY, testX, testY = mnist.load_data(one_hot=True)
hdf5 not supported (please install/reinstall h5py)Extracting mnist/train-images-idx3-ubyte.gzExtracting mnist/train-labels-idx1-ubyte.gzExtracting mnist/t10k-images-idx3-ubyte.gzExtracting mnist/t10k-labels-idx1-ubyte.gz

Define the Architecture (Basic Tensorflow)

# Because I don't feel like retyping stuff.def tfp(shape): return tf.placeholder("float", shape) def tfrn(shape, name): return tf.Variable(tf.random_normal(shape), name=name) # Define the inputs/outputs/weights as usual. X, Y = tfp([None, 784]), tfp([None, 10]) W1, W2, W3 = tfrn([784, 256], 'W1'), tfrn([256, 256], 'W2'), tfrn([256, 10], 'W3') b1, b2, b3 = tfrn([256], 'b1'), tfrn([256], 'b2'), tfrn([10], 'b3') # Multilayer perceptron. def dnn(x): x = tf.tanh(tf.add(tf.matmul(x, W1), b1)) x = tf.tanh(tf.add(tf.matmul(x, W2), b2)) x = tf.add(tf.matmul(x, W3), b3) return x net = dnn(X) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net, Y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1) accuracy = tf.reduce_mean(tf.cast( tf.equal( tf.argmax(net, 1), tf.argmax(Y, 1) ), tf.float32), name='acc')

Using a TFLearn Trainer

trainop = tflearn.TrainOp(loss=loss, optimizer=optimizer, metric=accuracy, batch_size=128) trainer = tflearn.Trainer(train_ops=trainop, tensorboard_verbose=1)
trainer.fit({
X: trainX, Y: trainY}, val_feed_dicts={ X: testX, Y: testY}, n_epoch=2, show_metric=True)
Training Step: 860  | total loss: [1m[32m1.73376[0m[0m| Optimizer | epoch: 002 | loss: 1.73376 - acc: 0.8053 | val_loss: 1.78279 - val_acc: 0.8015 -- iter: 55000/55000Training Step: 860  | total loss: [1m[32m1.73376[0m[0m| Optimizer | epoch: 002 | loss: 1.73376 - acc: 0.8053 | val_loss: 1.78279 - val_acc: 0.8015 -- iter: 55000/55000--

Training Callbacks

One suggestion for early stopping with tflearn (made by owner of tflearn repository) is to define a custom callback that raises an exception when we want to stop training. I’ve written a small snippet below as an example.

class EarlyStoppingCallback(tflearn.callbacks.Callback): def __init__(self, acc_thresh): """ Args: acc_thresh - if our accuracy > acc_thresh, terminate training. """ self.acc_thresh = acc_thresh self.accs = [] def on_epoch_end(self, training_state): """ """ self.accs.append(training_state.global_acc) if training_state.val_acc is not None and training_state.val_acc < self.acc_thresh: raise StopIteration
cb = EarlyStoppingCallback(acc_thresh=0.5) trainer.fit({ X: trainX, Y: trainY}, val_feed_dicts={ X: testX, Y: testY}, n_epoch=3, show_metric=True, snapshot_epoch=False, callbacks=cb)
Training Step: 3965  | total loss: [1m[32m0.33810[0m[0m| Optimizer | epoch: 010 | loss: 0.33810 - acc: 0.9455 -- iter: 55000/55000GOODBYE---------------------------------------------------------------------------StopIteration                             Traceback (most recent call last)
in
() 2 trainer.fit({X: trainX, Y: trainY}, val_feed_dicts={X: testX, Y: testY}, 3 n_epoch=3, show_metric=True, snapshot_epoch=False,----> 4 callbacks=cb)/usr/local/lib/python3.5/dist-packages/tflearn/helpers/trainer.py in fit(self, feed_dicts, n_epoch, val_feed_dicts, show_metric, snapshot_step, snapshot_epoch, shuffle_all, dprep_dict, daug_dict, excl_trainops, run_id, callbacks) 315 316 # Epoch end--> 317 caller.on_epoch_end(self.training_state) 318 319 finally:/usr/local/lib/python3.5/dist-packages/tflearn/callbacks.py in on_epoch_end(self, training_state) 67 def on_epoch_end(self, training_state): 68 for callback in self.callbacks:---> 69 callback.on_epoch_end(training_state) 70 71 def on_train_end(self, training_state):
in on_epoch_end(self, training_state) 13 if True: 14 print("GOODBYE")---> 15 raise StopIterationStopIteration:
cb.accs
[None] 参考:

Early Stopping with TensorFlow and TFLearn

import tensorflow as tfimport tflearn import tflearn.datasets.mnist as mnist trainX, trainY, testX, testY = mnist.load_data(one_hot=True)
hdf5 not supported (please install/reinstall h5py)Extracting mnist/train-images-idx3-ubyte.gzExtracting mnist/train-labels-idx1-ubyte.gzExtracting mnist/t10k-images-idx3-ubyte.gzExtracting mnist/t10k-labels-idx1-ubyte.gz
n_features = 784n_hidden = 256 n_classes = 10 # Define the inputs/outputs/weights as usual. X = tf.placeholder("float", [None, n_features]) Y = tf.placeholder("float", [None, n_classes]) # Define the connections/weights and biases between layers. W1 = tf.Variable(tf.random_normal([n_features, n_hidden]), name='W1') W2 = tf.Variable(tf.random_normal([n_hidden, n_hidden]), name='W2') W3 = tf.Variable(tf.random_normal([n_hidden, n_classes]), name='W3') b1 = tf.Variable(tf.random_normal([n_hidden]), name='b1') b2 = tf.Variable(tf.random_normal([n_hidden]), name='b2') b3 = tf.Variable(tf.random_normal([n_classes]), name='b3') # Define the operations throughout the network. net = tf.tanh(tf.add(tf.matmul(X, W1), b1)) net = tf.tanh(tf.add(tf.matmul(net, W2), b2)) net = tf.add(tf.matmul(net, W3), b3) # Define the optimization problem. loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net, Y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1) accuracy = tf.reduce_mean(tf.cast( tf.equal(tf.argmax(net, 1), tf.argmax(Y, 1) ), tf.float32), name='acc')

Early Stopping

Training Setup

In tflearn, we can train our model with a object: “Generic class to handle any TensorFlow graph training. It requires the use of TrainOp to specify all optimization parameters.”

  • represents a set of operation used for optimizing a network.

  • Example: Time to initialize our trainer to work with our MNIST network. Below we create a TrainOp object that is then used for the purpose of telling our trainer

    1. Our loss function. (softmax cross entropy with logits)
    2. Our optimizer. (GradientDescentOptimizer)
    3. Our evaluation [tensor] metric. (classification accuracy)
trainop = tflearn.TrainOp(loss=loss, optimizer=optimizer, metric=accuracy, batch_size=128) trainer = tflearn.Trainer(train_ops=trainop, tensorboard_verbose=1)

Callbacks

The interface describes a set of methods that we can implement ourselves that will be called during runtime. Below are our options, where here we will be primarily concerned with the on_epoch_end() method. * __ Methods __ :

def on_train_begin(self, training_state): def on_epoch_begin(self, training_state): def on_batch_begin(self, training_state): def on_sub_batch_begin(self, training_state): def on_sub_batch_end(self, training_state, train_index=0): def on_batch_end(self, training_state, snapshot=False): def on_epoch_end(self, training_state): def on_train_end(self, training_state):
  • TrainingState: Notice that each method requires us to pass a object as an argument. These useful helpers will be able to provide us with the information we need to determine when to stop training. Below is a list of the instance variables we can access with a training_state object:
    • self.epoch
    • self.step
    • self.current_iter
    • self.acc_value
    • self.loss_value
    • self.val_acc
    • self.val_loss
    • self.best_accuracy
    • self.global_acc
    • self.global_loss
  • Implementing our Callback: Let’s say we want to stop training when the validation accuracy reaches a certain threshold. Below, we implement the code required to define such a callback and fit the MNIST data.
class EarlyStoppingCallback(tflearn.callbacks.Callback): def __init__(self, val_acc_thresh): """ Note: We are free to define our init function however we please. """ self.val_acc_thresh = val_acc_thresh def on_epoch_end(self, training_state): """ """ # Apparently this can happen. if training_state.val_acc is None: return if training_state.val_acc > self.val_acc_thresh: raise StopIteration
# Initializae our callback.early_stopping_cb = EarlyStoppingCallback(val_acc_thresh=0.5) # Give it to our trainer and let it fit the data. trainer.fit(feed_dicts={ X: trainX, Y: trainY}, val_feed_dicts={ X: testX, Y: testY}, n_epoch=2, show_metric=True, # Calculate accuracy and display at every step. snapshot_epoch=False, callbacks=early_stopping_cb)
Training Step: 1720  | total loss: [1m[32m0.81290[0m[0m| Optimizer | epoch: 004 | loss: 0.81290 - acc_2: 0.8854 -- iter: 55000/55000

Using tf.contrib.learn instead

Iris data loading/tutorial prep

Note: can also load via: ```python import csv import random import numpy as np from sklearn import datasets from sklearn.cross_validation import train_test_split

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42) iris = datasets.load_iris() print(iris.data.shape) print(“Xt”, X_train.shape, “Yt”, y_train.shape) ```

from __future__ import absolute_importfrom __future__ import division from __future__ import print_function # Suppress the massive amount of warnings. tf.logging.set_verbosity(tf.logging.ERROR) # Data sets IRIS_TRAINING = "iris_training.csv" IRIS_TEST = "iris_test.csv" # Load datasets. training_set = tf.contrib.learn.datasets.base.load_csv_with_header(filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32) test_set = tf.contrib.learn.datasets.base.load_csv_with_header(filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32) # Specify that all features have real-value data feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)] # Build 3 layer DNN with 10, 20, 10 units respectively. classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3, model_dir="/tmp/iris_model") # Fit model. classifier.fit(x=X_train, y=y_train, steps=2000) # Evaluate accuracy. accuracy_score = classifier.evaluate(x=X_test, y=y_test)["accuracy"] print('Accuracy: {0:f}'.format(accuracy_score)) # Classify two new flower samples. new_samples = np.array([[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=np.float32) y = classifier.predict(new_samples) print('Predictions: {}'.format(str(y)))
Accuracy: 0.980000Predictions: [1 1]

Validation Monitors

# Vanilla versionvalidation_monitor = tf.contrib.learn.monitors.ValidationMonitor(test_set.data, test_set.target, every_n_steps=50) classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3, model_dir="/tmp/iris_model", config=tf.contrib.learn.RunConfig( save_checkpoints_secs=1)) classifier.fit(x=training_set.data, y=training_set.target, steps=2000, monitors=[validation_monitor])
Estimator(params={'dropout': None, 'hidden_units': [10, 20, 10], 'weight_column_name': None, 'feature_columns': [_RealValuedColumn(column_name='', dimension=4, default_value=None, dtype=tf.float32, normalizer=None)], 'optimizer': 'Adagrad', 'n_classes': 3, 'activation_fn': 
, 'num_ps_replicas': 0, 'gradient_clip_norm': None, 'enable_centered_bias': True})

Customizing the Evaluation Metrics and Stopping Early

If we run the code below, it stops early! Warning: You’re going to see a lot of WARNING print outputs from tf. I guess this tutorial is a bit out of date. But that’s not what we care abot here, we just want that early stopping! The important output to notice is

INFO:tensorflow:Validation (step 22556): accuracy = 0.966667, global_step = 22535, loss = 0.2767 INFO:tensorflow:Stopping. Best step: 22356 with loss = 0.2758353650569916.
validation_metrics = {
"accuracy": tf.contrib.metrics.streaming_accuracy, "precision": tf.contrib.metrics.streaming_precision, "recall": tf.contrib.metrics.streaming_recall} validation_monitor = tf.contrib.learn.monitors.ValidationMonitor( test_set.data, test_set.target, every_n_steps=50, #metrics=validation_metrics, early_stopping_metric='loss', early_stopping_metric_minimize=True, early_stopping_rounds=200) tf.logging.set_verbosity(tf.logging.ERROR) classifier.fit(x=training_set.data, y=training_set.target, steps=2000, monitors=[validation_monitor])
Estimator(params={'dropout': None, 'hidden_units': [10, 20, 10], 'weight_column_name': None, 'feature_columns': [_RealValuedColumn(column_name='', dimension=4, default_value=None, dtype=tf.float32, normalizer=None)], 'optimizer': 'Adagrad', 'n_classes': 3, 'activation_fn': 
, 'num_ps_replicas': 0, 'gradient_clip_norm': None, 'enable_centered_bias': True})

转载地址:http://ulyll.baihongyu.com/

你可能感兴趣的文章
【记录一个问题】linux + opencv + gpu视频解码,好不容易编译通过,运行又coredump了...
查看>>
layui 表格组件不能访问连续的属性的解决办法
查看>>
windows server 2003 原版 安装 php+mysql+apache 教程
查看>>
【BZOJ1930】【SHOI2003】吃豆豆
查看>>
PostgreSQL 10.0 压缩版的 pgAdmin 不能用的问题
查看>>
动态最小生成树讲解
查看>>
find命令
查看>>
Windows和Mac下安装Beautiful Soup
查看>>
Mac 配置android环境变量
查看>>
SkyLine二次开发——解决在web页面启动时自动运行TerraExplorer的问题
查看>>
约瑟夫环(Josehpuse)的模拟
查看>>
CSS小技巧
查看>>
正则匹配&nbsp;
查看>>
shell 读取文件
查看>>
给视图添加阴影
查看>>
数组2
查看>>
在django中,执行原始sql语句
查看>>
配置eclipse使能打开当前文件所在目录
查看>>
Repeater内RadioButton.GroupName失效
查看>>
【算法学习笔记】17.暴力求解法05 隐式图搜索1 迭代加深搜索 埃及分数
查看>>