Tensorflow fine-tunning AlexNet

包括Tensorflow下的 AlexNet 网络模型搭建、测试、使用自己的数据做 fine-tunning、批量测试训练结果

主要参考文章《Finetuning AlexNet with TensorFlow》,文章《TensorFlow微调AlexNet》是对其的翻译

tensorflow 比 caffe 要快很多。。。


AlexNet网络搭建

每个卷积和池化层后添加了输出尺寸的计算,方便fine-tunning时对于不同尺寸(227*227)的处理

与Caffe需要指明 pad 值不同,Tensorflow通过 ‘VALID’ 和 ‘SAME’ 区分两种 padding 方法

  • VALID
    不填0,输出尺寸 out_h=ceil(in_hfilter_h+1strides_h)
  • SAME
    边界按情况填0,输出尺寸 out_h=ceil(in_hstrides_h)
def create(self):
    # 1st Layer: Conv (w ReLu) -> Pool -> Lrn
    conv1 = conv(self.X, 11, 11, 96, 4, 4, padding = 'VALID', name = 'conv1')
    pool1 = max_pool(conv1, 3, 3, 2, 2, padding = 'VALID', name = 'pool1')
    norm1 = lrn(pool1, 2, 2e-05, 0.75, name = 'norm1'
    #    calc output size
    out_h = int(math.ceil(float(self.IN_SIZE[0]-11+1)/4))
    out_w = int(math.ceil(float(self.IN_SIZE[1]-11+1)/4))
    out_h = int(math.ceil(float(out_h-3+1)/2))
    out_w = int(math.ceil(float(out_w-3+1)/2))

    # 2nd Layer: Conv (w ReLu) -> Pool -> Lrn with 2 groups
    conv2 = conv(norm1, 5, 5, 256, 1, 1, groups = 2, name = 'conv2')
    pool2 = max_pool(conv2, 3, 3, 2, 2, padding = 'VALID', name ='pool2')
    norm2 = lrn(pool2, 2, 2e-05, 0.75, name = 'norm2')
    #    calc output size
    out_h = int(math.ceil(float(out_h-3+1)/2))
    out_w = int(math.ceil(float(out_w-3+1)/2))

    # 3rd Layer: Conv (w ReLu)
    conv3 = conv(norm2, 3, 3, 384, 1, 1, name = 'conv3')

    # 4th Layer: Conv (w ReLu) splitted into two groups
    conv4 = conv(conv3, 3, 3, 384, 1, 1, groups = 2, name = 'conv4')

    # 5th Layer: Conv (w ReLu) -> Pool splitted into two groups
    conv5 = conv(conv4, 3, 3, 256, 1, 1, groups = 2, name = 'conv5')
    pool5 = max_pool(conv5, 3, 3, 2, 2, padding = 'VALID', name = 'pool5')
    #    calc output size
    out_h = int(math.ceil(float(out_h-3+1)/2))
    out_w = int(math.ceil(float(out_w-3+1)/2))

    # 6th Layer: Flatten -> FC (w ReLu) -> Dropout
    flattened = tf.reshape(pool5, [-1, out_h*out_w*256])
    fc6 = fc(flattened, out_h*out_w*256, 4096, name='fc6')
    dropout6 = dropout(fc6, self.KEEP_PROB)

    # 7th Layer: FC (w ReLu) -> Dropout
    fc7 = fc(dropout6, 4096, 4096, name = 'fc7')
    dropout7 = dropout(fc7, self.KEEP_PROB)

    # 8th Layer: FC and return unscaled activations
    # (for tf.nn.softmax_cross_entropy_with_logits)
    self.fc8 = fc(dropout7, 4096, self.NUM_CLASSES, relu = False, name='fc8')

辅助函数

def conv(x, filter_height, filter_width, num_filters, stride_y, stride_x, name, padding='SAME', groups=1):
    # Get number of input channels
    input_channels = int(x.get_shape()[-1])

    # Create lambda function for the convolution
    convolve = lambda i, k: tf.nn.conv2d(i, k,
                                strides = [1, stride_y, stride_x, 1],
                                padding = padding)

    with tf.variable_scope(name) as scope:
        # Create tf variables for the weights and biases of the conv layer
        weights = tf.get_variable('weights',
                                shape = [filter_height, filter_width,
                                input_channels/groups, num_filters])
        biases = tf.get_variable('biases', shape = [num_filters])

        if groups == 1:
            conv = convolve(x, weights)

        # In the cases of multiple groups, split inputs & weights and
        else:
            # Split input and weights and convolve them separately
            input_groups = tf.split(axis = 3, num_or_size_splits=groups, value=x)
            weight_groups = tf.split(axis = 3, num_or_size_splits=groups, value=weights)
            output_groups = [convolve(i, k) for i,k in zip(input_groups, weight_groups)]
            # Concat the convolved output together again
            conv = tf.concat(axis = 3, values = output_groups)

        # Add biases
        bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list())

        # Apply relu function
        relu = tf.nn.relu(bias, name = scope.name)

        return relu


def fc(x, num_in, num_out, name, relu = True):
    with tf.variable_scope(name) as scope:
        # Create tf variables for the weights and biases
        weights = tf.get_variable('weights', shape=[num_in, num_out], trainable=True)
        biases = tf.get_variable('biases', [num_out], trainable=True)

        # Matrix multiply weights and inputs and add bias
        act = tf.nn.xw_plus_b(x, weights, biases, name=scope.name)

        if relu == True:
            # Apply ReLu non linearity
            relu = tf.nn.relu(act)
            return relu
        else:
            return act


def max_pool(x, filter_height, filter_width, stride_y, stride_x, name, padding='SAME'):
    return tf.nn.max_pool(x, ksize=[1, filter_height, filter_width, 1],
                              strides = [1, stride_y, stride_x, 1],
                              padding = padding, name = name)


def lrn(x, radius, alpha, beta, name, bias=1.0):
    return tf.nn.local_response_normalization(x, depth_radius = radius,
                                             alpha = alpha, beta = beta,
                                             bias = bias, name = name)


def dropout(x, keep_prob):
    return tf.nn.dropout(x, keep_prob)

初始权重模型载入(.npy)

《caffe-tensorflow》提供了从 caffe 模型到 tensorflow 模型转换的工具
本文直接从这里下载.npy模型

def load_initial_weights(self, session):
    # Load the weights into memory
    weights_dict = np.load(self.WEIGHTS_PATH, encoding = 'bytes').item()

    # Loop over all layer names stored in the weights dict
    for op_name in weights_dict:
        # Check if the layer is one of the layers that should be reinitialized
       if op_name not in self.SKIP_LAYER:
            with tf.variable_scope(op_name, reuse = True):
                # Loop over list of weights/biases and assign them to their corresponding tf variable
                for data in weights_dict[op_name]:
                    # Biases
                    if len(data.shape) == 1:
                        var = tf.get_variable('biases', trainable = False)
                        session.run(var.assign(data))

                    # Weights
                    else:
                        var = tf.get_variable('weights', trainable = False)
                        session.run(var.assign(data))

AlexNet 网络测试

class alexnet_test(object):
    def __init__(self):
        self.PRE_MODEL = 'bvlc_alexnet.npy'

    def test_imagenet(self, imgs_):
        num_classes = 1000
        skip_layer = []
        imgs = []

        #mean of imagenet dataset in BGR
        imagenet_mean = np.array([104., 117., 124.], dtype=np.float32)
        #plot images
        fig = plt.figure(figsize=(15,6))
        for i, img_ in enumerate(imgs_):
            img = cv2.imread(img_)
            imgs.append(img)
            fig.add_subplot(1,3,i+1)
            plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            plt.axis('off')

        #placeholder for input and dropout rate
        x = tf.placeholder(tf.float32, [1, 227, 227, 3])
        keep_prob = tf.placeholder(tf.float32)
        #create model with default config ( == no skip_layer and 1000 units in the last layer)
        model = alexnet(x, keep_prob, num_classes, skip_layer, weights_path=self.PRE_MODEL)
        #define activation of last layer as score
        score = model.fc8
        #create op to calculate softmax 
        softmax = tf.nn.softmax(score)

        with tf.Session() as sess:
            # Initialize all variables
            sess.run(tf.global_variables_initializer())
            # Load the pretrained weights into the model
            model.load_initial_weights(sess)
            # Create figure handle
            fig2 = plt.figure(figsize=(15,6))
            # Loop over all images
            for i, image in enumerate(imgs):
                # Convert image to float32 and resize to (227x227)
                img = cv2.resize(image.astype(np.float32), (227,227))
                # Subtract the ImageNet mean
                img -= imagenet_mean
                # Reshape as needed to feed into model
                img = img.reshape((1,227,227,3))
                # Run the session and calculate the class probability
                probs = sess.run(softmax, feed_dict={x: img, keep_prob: 1})
                # Get the class name of the class with the highest probability
                class_name = class_names[np.argmax(probs)]
                # Plot image with class name and prob in the title
                fig2.add_subplot(1,3,i+1)
                plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
                plt.title("Class: " + class_name + ", probability: %.4f" %probs[0,np.argmax(probs)])
                plt.axis('off')
        plt.show()

fine-tunning

def fine_tuning(self, train_list, test_list, mean, snapshot, filewriter_path):
    # Learning params
    learning_rate = 0.001
    num_epochs = 80000
    batch_size = 50

    # Network params
    in_img_size = (332, 675) #(height, width)
    dropout_rate = 0.5
    num_classes = 6
    train_layers = ['fc6', 'fc7', 'fc8']

    # How often we want to write the tf.summary data to disk
    display_step = 40

    x = tf.placeholder(tf.float32, [batch_size, in_img_size[0], in_img_size[1], 3])
    y = tf.placeholder(tf.float32, [None, num_classes])
    keep_prob = tf.placeholder(tf.float32)

    # Initialize model
    model = alexnet(x, keep_prob, num_classes, train_layers, in_size=in_img_size
    #link variable to model output
    score = model.fc8
    # List of trainable variables of the layers we want to train
    var_list = [v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layers]
    # Op for calculating the loss
    with tf.name_scope("cross_ent"):
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = score, labels = y))
    # Train op
    with tf.name_scope("train"):
        # Get gradients of all trainable variables
        gradients = tf.gradients(loss, var_list)
        gradients = list(zip(gradients, var_list))

        # Create optimizer and apply gradient descent to the trainable variables
        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        train_op = optimizer.apply_gradients(grads_and_vars=gradients)

    # Add gradients to summary
    for gradient, var in gradients:
        tf.summary.histogram(var.name + '/gradient', gradient)
    # Add the variables we train to the summary
    for var in var_list:
        tf.summary.histogram(var.name, var)
    # Add the loss to summary
    tf.summary.scalar('cross_entropy', loss)

    # Evaluation op: Accuracy of the model
    with tf.name_scope("accuracy"):
        correct_pred = tf.equal(tf.argmax(score, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    # Add the accuracy to the summary
    tf.summary.scalar('accuracy', accuracy)

    # Merge all summaries together
    merged_summary = tf.summary.merge_all()
    # Initialize the FileWriter
    writer = tf.summary.FileWriter(filewriter_path)
    # Initialize an saver for store model checkpoints
    saver = tf.train.Saver()
    # Initalize the data generator seperately for the training and validation set
    train_generator = ImageDataGenerator(train_list, horizontal_flip = True, shuffle = False, mean=mean, scale_size=in_img_size, nb_classes=num_classes)
    val_generator = ImageDataGenerator(test_list, shuffle = False, mean=mean, scale_size=in_img_size, nb_classes=num_classes)
    # Get the number of training/validation steps per epoch
    train_batches_per_epoch = np.floor(train_generator.data_size / batch_size).astype(np.int16)
    val_batches_per_epoch = np.floor(val_generator.data_size / batch_size).astype(np.int16)

    # Start Tensorflow session
    with tf.Session() as sess:
        # Initialize all variables
        sess.run(tf.global_variables_initializer())
        # Add the model graph to TensorBoard
        writer.add_graph(sess.graph)
        # Load the pretrained weights into the non-trainable layer
        model.load_initial_weights(sess)
        print("{} Start training...".format(datetime.now()))
        print("{} Open Tensorboard at --logdir {}".format(datetime.now(), filewriter_path))
        # Loop over number of epochs
        for epoch in range(num_epochs):
            print("{} Epoch number: {}/{}".format(datetime.now(), epoch+1, num_epochs))
            step = 1
            while step < train_batches_per_epoch:
                # Get a batch of images and labels
                batch_xs, batch_ys = train_generator.next_batch(batch_size)
                # And run the training op
                sess.run(train_op, feed_dict={x: batch_xs, y: batch_ys, keep_prob: dropout_rate})
                # Generate summary with the current batch of data and write to file
                if step%display_step == 0:
                    s = sess.run(merged_summary, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
                    writer.add_summary(s, epoch*train_batches_per_epoch + step)
                step += 1

            # Validate the model on the entire validation set
            print("{} Start validation".format(datetime.now()))
            test_acc = 0.
            test_count = 0
            for _ in range(val_batches_per_epoch):
                batch_tx, batch_ty = val_generator.next_batch(batch_size)
                acc = sess.run(accuracy, feed_dict={x: batch_tx, y: batch_ty, keep_prob: 1.})
                test_acc += acc
                test_count += 1
            test_acc /= test_count
            print("{} Validation Accuracy = {:.4f}".format(datetime.now(), test_acc))

            # Reset the file pointer of the image data generator
            val_generator.reset_pointer()
            train_generator.reset_pointer()
            print("{} Saving checkpoint of model...".format(datetime.now()))

            #save checkpoint of the model
            if epoch % display_step == 0:
                checkpoint_name = os.path.join(snapshot, 'model_epoch'+str(epoch)+'.ckpt')
                save_path = saver.save(sess, checkpoint_name)
                print("{} Model checkpoint saved at {}".format(datetime.now(), checkpoint_name))

另一个终端,tensorboard –logdir train_log,打开浏览器,127.0.0.1:6006 查看训练细节


batch predict

def predict_batch(self, val_list, mean, weight_file, result_file):
    in_img_size = (332, 675) #(height, width)
    dropout_rate = 0.5
    num_classes = 6
    train_layers = []

    x = tf.placeholder(tf.float32, [1, in_img_size[0], in_img_size[1], 3])
    model = alexnet(x, 1., num_classes, train_layers, in_size=in_img_size, weights_path=weight_file)
    score = model.fc8
    softmax = tf.nn.softmax(score)

    val_generator = ImageDataGenerator(val_list, horizontal_flip = False, shuffle = False, mean=mean, scale_size=in_img_size, nb_classes=num_classes)

    precision = np.zeros((num_classes+1, num_classes), dtype=np.float)
    total_presion = 0.

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        tf.train.Saver().restore(sess, weight_file)

        self._start_end_time[0] = time.clock()
        for index in range(val_generator.data_size):
            print 'handing %d / %d ...\r'%(index+1, val_generator.data_size),

            img_ = val_generator.images[index]
            label = val_generator.labels[index]
            img = cv2.imread(img_)
            img = cv2.resize(img, (val_generator.scale_size[1], val_generator.scale_size[0]))
            img = img.reshape(1, val_generator.scale_size[0], val_generator.scale_size[1], 3)
            img = img.astype(np.float32)

            probs = sess.run(softmax, feed_dict={x: img})
            guess = np.argmax(probs)
            if guess == label:
                precision[guess][guess] += 1
                total_presion += 1
            else:
                precision[guess][int(val_generator.labels[index])] += 1
        self._start_end_time[1] = time.clock()

        for i in range(num_classes):
            for j in range(num_classes):
                precision[num_classes][i] += precision[j][i]
        for i in range(num_classes):
            for j in range(num_classes):
                precision[i][j] /= precision[num_classes][j]
        total_presion /= val_generator.data_size

        slaped = (self._start_end_time[1] - self._start_end_time[0]) / val_generator.data_size

        file = open(result_file, 'w')
        file.write('model: ' + weight_file + '\n')
        print '\n#####################################################################'
            file.writelines(['################################################################\n'])
        text_ = ''
        for i in range(num_classes):
            print '        %d'%i,
            text_ += '        %d'%i
        print '\n'
        file.write(text_ + '\n')
        for i in range(num_classes):
            print '  %d'%i,
            file.write('  ' + str(i))
            for j in range(num_classes):
                str_preci = '    %.2f'%precision[i][j]
                print '  %.2f  '%precision[i][j],
                file.write(str_preci)
            print '\n'
            file.write('\n')
        print '\ntotal precision: %.2f'%total_presion
        print 'average speed: %.4f / image'%slaped
        str_preci = 'total precision: %.2f'%total_presion
        file.writelines(['\n' + str_preci + '\n'])
        str_slaped = 'average speed: %.4f s / image'%slaped
        file.write(str_slaped + '\n')
        file.close()

代码:https://github.com/yayo13/tensorflow_finetunning_alexnet


你可能感兴趣的:(机器学习,tensorflow)