keras中h5模型保存和恢复方法 DenseNetX的完整例程

https://blog.csdn.net/mcyJacky/article/details/88706164

模型保存

model.save(‘model.h5’) # HDF5文件,pip install h5py

h5模型恢复和继续训练

必须原样不动加
from tensorflow.keras.models import load_model
下面可以可以通过load_model()方法,对保存的模型进行恢复或者可以对模型进行继续训练。具体如下:

import numpy as np
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
**from tensorflow.keras.models import load_model**

# 载入数据
(x_train,y_train),(x_test,y_test) = mnist.load_data()
# (60000,28,28)
print('x_shape:',x_train.shape)
# (60000)
print('y_shape:',y_train.shape)
# (60000,28,28)->(60000,784)
x_train = x_train.reshape(x_train.shape[0],-1)/255.0
x_test = x_test.reshape(x_test.shape[0],-1)/255.0
# 换one hot格式
y_train = np_utils.to_categorical(y_train,num_classes=10)
y_test = np_utils.to_categorical(y_test,num_classes=10)

# 载入模型
model = load_model('model.h5')

# 评估模型
loss, accuracy = model.evaluate(x_test, y_test)
print('\ntest loss', loss)
print('accuracy', accuracy)

h5模型保存和载入参数
当然模型也能只保存参数,并载入参数进行使用,具体如下

保存参数,载入参数

model.save_weights(‘my_model_weights.h5’)
model.load_weights(‘my_model_weights.h5’)

h5模型保存和载入网络结构

当然模型也能不同的网络结构,如json格式,具体示例如下:
from keras.models import model_from_json
json_string = model.to_json()

model = model_from_json(json_string)
print(json_string)

总结流程:

HDF5导出
导出整个模型
“”“默认1.0 是HDF5,但是2.0中,是SavedModel,所以需要显性地指定.h5后缀”""
model.save(‘my_model.h5’)
导出模型weights
“”“keras 1.0"”"
model.save_weights(‘my_model_weights.h5’)
HDF5加载
加载整个模型(无自定义部分)
keras1.0
“”“keras 1.0"”"
from keras.models import load_model
model = load_model(model_path)
keras2.0
“”“keras 2.0"”"
new_model = tf.keras.models.load_model(‘my_model.h5’)

Presumably, it’s a custom function parsing the filename, something like int(checkpoint_path.split(’.’)[1]) would work if the filepath was replaced with weights.{epoch:02d}.{val_loss:.2f}.hdf5

编写程序如下:
所在目录

john@john-wang:~/Vitis-AI_1.1/savertest$ find ./ -name "*.py" | xargs grep "initial_epoch"
./cifar/trainrestore.py:       initial_epoch=int(listfile[0].split(".")[1])
./cifar/trainrestore.py:       verbose=1, initial_epoch=initial_epoch)

完整继续训练DenseNetX例程:

请注意程序中的load_model initial_epoch epoch.{epoch:03d}.val_acc.{val_acc:.2f}.h5三个部分

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import os
import sys
import argparse

from datadownload import datadownload

# Silence TensorFlow messages
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# workaround for TF1.15 bug "Could not create cudnn handle: CUDNN_STATUS_INTERNAL_ERROR"
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop, SGD
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, LearningRateScheduler, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from DenseNetX import densenetx

from tensorflow.keras.models import load_model
#from tensorflow.keras.models import get_init_epoch

DIVIDER = '-----------------------------------------'

def train(input_height,input_width,input_chan,batchsize,learnrate,epochs,keras_hdf5,tboard):

    def step_decay(epoch):
        """
        Learning rate scheduler used by callback
        Reduces learning rate depending on number of epochs
        """
        lr = learnrate
        if epoch > 150:
            lr /= 1000
        elif epoch > 120:
            lr /= 100
        elif epoch > 80:
            lr /= 10
        elif epoch > 2:
            lr /= 2
 # test lr, must comment for train
        lr = lr/ 1000
        return lr
    

    # CIFAR10 dataset has 60k images. Training set is 50k, test set is 10k.
    # Each image is 32x32x8bits
    (x_train, y_train), (x_test, y_test) = datadownload()
    print ('Dataset downloaded and pre-processed')

    '''
    -----------------------------------------------
    CALLBACKS
    -----------------------------------------------
    '''

    # chkpt_call = ModelCheckpoint(filepath=keras_hdf5,
                                 # monitor='val_acc',
                                 # verbose=1,
                                 # save_best_only=True)
     
    **chkpt_call = ModelCheckpoint(filepath=keras_hdf5+"epoch.{epoch:03d}.val_acc.{val_acc:.2f}.h5",**
                                  monitor='val_acc',
                                  verbose=1,
                                  save_best_only=True)                  
 
    tb_call = TensorBoard(log_dir=tboard,
                          batch_size=batchsize,
                          update_freq='epoch')

    lr_scheduler_call = LearningRateScheduler(schedule=step_decay,
                                              verbose=1)

    lr_plateau_call = ReduceLROnPlateau(factor=np.sqrt(0.1),
                                        cooldown=0,
                                        patience=5,
                                        min_lr=0.5e-6)

    callbacks_list = [tb_call, lr_scheduler_call, lr_plateau_call, chkpt_call]

    **model_path=keras_hdf5
    listfile = [i for i in os.listdir(model_path) if i.endswith("h5")] 
    print("listfile = {}".format(listfile))
    listfile.sort()
    listfile=listfile[::-1]
    print("listfile = {}".format(listfile))
#    if listfile is not None:
#    while not listfile:
    if len(listfile) != 0:
       model_path = model_path + listfile[0]  
       model = load_model(model_path)  
       # latest=tf.train.latest_checkpoint(model_path)
       #    json_string = model.to_json()
       #    print(json_string)
       # Finding the epoch index from which we are resuming
       # initial_epoch = get_init_epoch(checkpoint_path)
       initial_epoch=int(listfile[0].split(".")[1])
       print("initial_epoch = %d"%int(initial_epoch))
       # Calculating the correct value of count
       # count = initial_epoch*batchsize
       # Update the value of count in callback instance
       # callbacks_list[1].count = count 
    else:
       model = densenetx(input_shape=(input_height,input_width,input_chan),classes=10,theta=0.5,drop_rate=0.2,k=12,convlayers=[16,16,16])
       initial_epoch = 0**  
        
    # prints a layer-by-layer summary of the network
    print('\n'+DIVIDER)
    print(' Model Summary')
    print(DIVIDER)
#    print(model.summary())
    print("Model Inputs: {ips}".format(ips=(model.inputs)))
    print("Model Outputs: {ops}".format(ops=(model.outputs)))

    model.summary()

    '''
    -----------------------------------------------
    TRAINING
    -----------------------------------------------
    '''

    '''
    Input image pipeline for training, validation
    
     data augmentation for training
       - random rotation
       - random horiz flip
       - random linear shift up and down
    '''
    data_augment = ImageDataGenerator(rotation_range=10,
                                      horizontal_flip=True,
                                      height_shift_range=0.1,
                                      width_shift_range=0.1,
                                      shear_range=0.1,
                                      zoom_range=0.1)

    train_generator = data_augment.flow(x=x_train,
                                        y=y_train,
                                        batch_size=batchsize,
                                        shuffle=True)
                                  
    '''
    Optimizer
    RMSprop used in this example.
    SGD  with Nesterov momentum was used in original paper
    '''
    #opt = SGD(lr=learnrate, momentum=0.9, nesterov=True)
    opt = RMSprop(lr=learnrate)
    
    model.compile(optimizer=opt,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # calculate number of steps in one train
    # train_steps = train_generator.n//train_generator.batch_size
    **train_steps = train_generator.n//train_generator.batch_size/100**
    print(f"train_generator.n = {train_generator.n}")
    print("train_steps = %d"%train_steps)
    # run training
    model.fit_generator(generator=train_generator,
                        epochs=epochs,
                        steps_per_epoch=train_steps,
                        validation_data=(x_test, y_test),
                        callbacks=callbacks_list,
                        verbose=1, initial_epoch=initial_epoch)

    print("\nTensorBoard can be opened with the command: tensorboard --logdir={dir} --host localhost --port 6006".format(dir=tboard))

    print('\n'+DIVIDER)
    # print(' Evaluate model accuracy with validation set..')
    # print(DIVIDER)

    # '''
    # -----------------------------------------------
    # EVALUATION
    # -----------------------------------------------
    # '''

    # scores = model.evaluate(x=x_test,y=y_test,batch_size=50, verbose=0)
    # print ('Evaluation Loss    : ', scores[0])
    # print ('Evaluation Accuracy: ', scores[1])


    # '''
    # -----------------------------------------------
    # PREDICTIONS
    # -----------------------------------------------
    # '''

    # # make predictions
    # predictions = model.predict(x_test,
                                # batch_size=batchsize,
                                # verbose=1)

    # # check accuracy
    # correct = 0
    # wrong = 0
    # for i in range(len(predictions)):
        # pred = np.argmax(predictions[i])
        # if (pred== np.argmax(y_test[i])):
            # correct+=1
        # else:
            # wrong+=1

    # print ('Correct predictions:',correct,' Wrong predictions:',wrong,' Accuracy:',(correct/len(predictions)))

    return


def run_main():
    
    print('\n'+DIVIDER)
    print('Keras version      : ',tf.keras.__version__)
    print('TensorFlow version : ',tf.__version__)
    print(sys.version)
    print(DIVIDER)

    # construct the argument parser and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument('-ih', '--input_height',
                    type=int,
                    default='32',
    	            help='Input image height in pixels.')
    ap.add_argument('-iw', '--input_width',
                    type=int,
                    default='32',
    	            help='Input image width in pixels.')
    ap.add_argument('-ic', '--input_chan',
                    type=int,
                    default='3',
    	            help='Number of input image channels.')
    ap.add_argument('-b', '--batchsize',
                    type=int,
                    default=100,
    	            help='Training batchsize. Must be an integer. Default is 100.')
    ap.add_argument('-e', '--epochs',
                    type=int,
                    default=300,
    	            help='number of training epochs. Must be an integer. Default is 300.')
    ap.add_argument('-lr', '--learnrate',
                    type=float,
                    default=0.001,
    	            help='optimizer initial learning rate. Must be floating-point value. Default is 0.001')
    ap.add_argument('-kh', '--keras_hdf5',
                    type=str,
                    default='./model.hdf5',
    	            help='path of Keras HDF5 file - must include file name. Default is ./model.hdf5')
    ap.add_argument('-tb', '--tboard',
                    type=str,
                    default='./tb_logs',
    	            help='path to folder for saving TensorBoard data. Default is ./tb_logs.')    
    args = ap.parse_args()
 
    args.learnrate = 0.002
    # final epochs
    args.epochs = 5

    print(' Command line options:')
    print ('--input_height : ',args.input_height)
    print ('--input_width  : ',args.input_width)
    print ('--input_chan   : ',args.input_chan)
    print ('--batchsize    : ',args.batchsize)
    print ('--learnrate    : ',args.learnrate)
    print ('--epochs       : ',args.epochs)
    print ('--keras_hdf5   : ',args.keras_hdf5)
    print ('--tboard       : ',args.tboard)
    print(DIVIDER)

    train(args.input_height,args.input_width,args.input_chan,args.batchsize,args.learnrate,args.epochs,args.keras_hdf5,args.tboard)


if __name__ == '__main__':
    run_main()

不管list是空还是有元素,list都不是None,即list is not None都是True
正确地记录两次运行结果并拼接成功:
keras中h5模型保存和恢复方法 DenseNetX的完整例程_第1张图片

你可能感兴趣的:(Python,AI,tensorflow)