Tensorflow实现AlexNet

参考链接:
AlexNet论文
csdn博客:AlexNet网络结构解析
Caffe AlexNet实现
ImageNet数据集下载:ILSVRC2012
ILSVRC2012数据集label信息下载
ImageNet数据集处理by Caffe
caffe针对imagenet相关文件详解
kratzert AlexNet TensorFlow开源实现
kratzert AlexNet TensorFlow开源实现解析
借鉴以上链接:从图像导入以及AlexNet网络结构搭建以及测试,完整过程如下:

版权声明:本程序修改自https://github.com/kratzert/finetune_alexnet_with_tensorflow/tree/5d751d62eb4d7149f4e3fd465febf8f07d4cea9d
。 如果要应用本程序代码,请务必在评论区留言征得本人同意!

ImageNet图像处理

制作ImageNet2012数据集,并进行预处理
图像的均值文件下载地址:https://github.com/BVLC/caffe/blob/master/python/caffe/imagenet/ilsvrc_2012_mean.npy

datagenerator.py:

"""Containes a helper class for image input pipelines in tensorflow."""

import tensorflow as tf
import numpy as np
import os

from tensorflow.python.framework import dtypes
from tensorflow.python.framework.ops import convert_to_tensor


image_mean = np.load('ilsvrc_2012_mean.npy')
image_mean = np.swapaxes(image_mean, 0, 2)
image_mean = image_mean[:,:,::-1]


class ImageDataGenerator(object):
    """Wrapper class around the new Tensorflows dataset pipeline.
    """

    def __init__(self, txt_file, mode, imgfile_folder, batch_size, num_classes, shuffle=True,
                 buffer_size=1000):

        self.txt_file = txt_file
        self.imgfile_folder = imgfile_folder
        self.num_classes = num_classes

        # retrieve the data from the text file
        self._read_txt_file()

        # number of samples in the dataset
        self.data_size = len(self.labels)

        # initial shuffling of the file and label lists (together!)
        if shuffle:
            self._shuffle_lists()

        # convert lists to TF tensor
        self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string)
        self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32)

        # create dataset
        data = tf.data.Dataset.from_tensor_slices((self.img_paths, self.labels))

        # distinguish between train/infer. when calling the parsing functions
        if mode == 'training':
            # param: output_buffer_size = 100 * batch_size
            data = data.map(self._parse_function_train, num_parallel_calls=7)

        elif mode == 'inference':
            # param: output_buffer_size = 100 * batch_size
            data = data.map(self._parse_function_inference, num_parallel_calls=7)

        else:
            raise ValueError("Invalid mode '%s'." % (mode))

        # shuffle the first `buffer_size` elements of the dataset
        if shuffle:
            data = data.shuffle(buffer_size=buffer_size)

        # create a new dataset with batches of images
        data = data.batch(batch_size)

        self.data = data

    def _read_txt_file(self, ):
        """Read the content of the text file and store it into lists."""
        cur_path = os.path.join(os.path.split(os.getcwd())[0], self.imgfile_folder)
        self.img_paths = []
        self.labels = []
        with open(self.txt_file, 'r') as f:
            lines = f.readlines()
            for line in lines:
                items = line.split(' ')
                self.img_paths.append(os.path.join(cur_path, items[0]))
                self.labels.append(int(items[1]))

    def _shuffle_lists(self):
        """Conjoined shuffling of the list of paths and labels."""
        path = self.img_paths
        labels = self.labels
        permutation = np.random.permutation(self.data_size)
        self.img_paths = []
        self.labels = []
        for i in permutation:
            self.img_paths.append(path[i])
            self.labels.append(labels[i])

    def _parse_function_train(self, filename, label):
        """Input parser for samples of the training set."""
        # convert label number into one-hot-encoding
        # one_hot = tf.one_hot(label, self.num_classes)

        img_string = tf.read_file(filename)
        img_decoded = tf.image.decode_png(img_string, channels=3)
        img_resize = tf.image.resize_images(img_decoded, [256, 256], method=3)  # 缩放尺寸为[256, 256]
        img_resize = tf.subtract(img_resize, image_mean)  # 减去平均值
        img_resized = tf.random_crop(img_resize, [227, 227, 3])  # 随机裁切为[227,227,3]
        # RGB -> BGR
        img_bgr = img_resized[:, :, ::-1]

        return img_bgr, label

    def _parse_function_inference(self, filename, label):

        """Input parser for samples of the validation/test set."""
        # convert label number into one-hot-encoding
        # one_hot = tf.one_hot(label, self.num_classes)

        img_string = tf.read_file(filename)
        img_decoded = tf.image.decode_png(img_string, channels=3)
        img_resize = tf.image.resize_images(img_decoded, [256,256], method=3)
        img_resize = tf.subtract(img_resize, image_mean)
        img_resized = tf.image.resize_image_with_crop_or_pad(img_resize, 227, 227)  # 中心裁剪[227,227]
        img_resized = tf.cast(img_resized, tf.float32)
        # RGB -> BGR
        img_bgr = img_resized[:, :, ::-1]
        
        return img_bgr, label

#AlexNet网络结构定义为类

alexnet.py:

import tensorflow as tf
import numpy as np


class AlexNet(object):
    def __init__(self, x, keep_prob6, keep_prob7, num_classes, skip_layer, weights_path):

        # Parse input arguments into class variables
        self.X = x
        self.NUM_CLASSES = num_classes
        self.KEEP_PROB6 = keep_prob6
        self.KEEP_PROB7 = keep_prob7
        self.SKIP_LAYER = skip_layer
        self.WEIGHTS_PATH = weights_path

        # Call the create function to build the computational graph of AlexNet
        self.create()

    def create(self):
        # 1st Layer: Conv (w ReLu) -> Lrn -> Pool
        conv1 = conv(self.X, 11, 11, 96, 4, 4, padding='VALID', name='conv1')
        norm1 = lrn(conv1, 2, 2e-05, 0.75, name='norm1')
        pool1 = max_pool(norm1, 3, 3, 2, 2, padding='VALID', name='pool1')
        
        # 2nd Layer: Conv (w ReLu)  -> Lrn -> Pool with 2 groups
        conv2 = conv(pool1, 5, 5, 256, 1, 1, groups=2, name='conv2')
        norm2 = lrn(conv2, 2, 2e-05, 0.75, name='norm2')
        pool2 = max_pool(norm2, 3, 3, 2, 2, padding='VALID', name='pool2')
        
        # 3rd Layer: Conv (w ReLu)
        conv3 = conv(pool2, 3, 3, 384, 1, 1, name='conv3')

        # 4th Layer: Conv (w ReLu) splitted into two groups
        conv4 = conv(conv3, 3, 3, 384, 1, 1, groups=2, name='conv4')

        # 5th Layer: Conv (w ReLu) -> Pool splitted into two groups
        conv5 = conv(conv4, 3, 3, 256, 1, 1, groups=2, name='conv5')
        pool5 = max_pool(conv5, 3, 3, 2, 2, padding='VALID', name='pool5')

        # 6th Layer: Flatten -> FC (w ReLu) -> Dropout
        flattened = tf.reshape(pool5, [-1, 6*6*256])
        fc6 = fc(flattened, 6*6*256, 4096, name='fc6')
        dropout6 = dropout(fc6, self.KEEP_PROB6)

        # 7th Layer: FC (w ReLu) -> Dropout
        fc7 = fc(dropout6, 4096, 4096, name='fc7')
        dropout7 = dropout(fc7, self.KEEP_PROB7)

        # 8th Layer: FC and return unscaled activations
        self.fc8 = fc(dropout7, 4096, self.NUM_CLASSES, relu=False, name='fc8')

    def load_initial_weights(self, session):
        weights = np.load(self.WEIGHTS_PATH)
        weights_dict = {}
        for k in weights.keys():
            layer_name = k.split('_')[0]
            if layer_name not in weights_dict.keys():
                weights_dict[layer_name] = []
            weights_dict[layer_name].append(weights[k].copy())

        # Loop over all layer names stored in the weights dict
        for op_name in weights_dict:
            with tf.variable_scope(op_name, reuse=True):

                # Assign weights/biases to their corresponding tf variable
                for data in weights_dict[op_name]:

                    # Biases
                    if len(data.shape) == 1:
                        var = tf.get_variable('biases')
                        update = var.assign(data)
                        session.run(update)

                    # Weights
                    else:
                        var = tf.get_variable('weights')
                        update = var.assign(data)
                        session.run(update)

    # load params into computational graph
    def load_params(self, session, param_dict):
        for i,v in enumerate(tf.trainable_variables()):
            v.load(param_dict[i], session)

    # get params into computational graph
    def get_params(self, session):
        layer = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5', 'fc6', 'fc7', 'fc8']
        param = []
        for op_id in range(len(layer)):
            with tf.variable_scope(layer[op_id], reuse=True):
                varw = tf.get_variable('weights')
                param.append(session.run(varw))
                varb = tf.get_variable('biases')
                param.append(session.run(varb))

        return param


def conv(x, filter_height, filter_width, num_filters, stride_y, stride_x, name,
         padding='SAME', groups=1):
    # Get number of input channels
    input_channels = int(x.get_shape()[-1])


    # Create lambda function for the convolution
    convolve = lambda i, k: tf.nn.conv2d(i, k,
                                         strides=[1, stride_y, stride_x, 1],
                                         padding=padding)

    with tf.variable_scope(name) as scope:
        # Create tf variables for the weights and biases of the conv layer
        weights = tf.get_variable('weights', shape=[filter_height,
                                                    filter_width,
                                                    input_channels/groups,
                                                    num_filters])
        biases = tf.get_variable('biases', shape=[num_filters])
        l2_loss_conv = tf.multiply(tf.nn.l2_loss(weights), 0.001)
        tf.add_to_collection('losses', l2_loss_conv)

    if groups == 1:
        conv = convolve(x, weights)

        # In the cases of multiple groups, split inputs & weights and
    else:
        # 拿conv2举例:
        # 权重由(5,5,48,256)分解为(5,5,48,128)和(5,5,48,128)
        # 数据由(1,27,27,96)分解为(1,27,27,48)和(1,27,27,48)
        # Split input and weights and convolve them separately
        input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x)
        weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=weights)
        output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)]

        # Concat the convolved output together again
        conv = tf.concat(axis=3, values=output_groups)

    # Add biases
    bias = tf.reshape(tf.nn.bias_add(conv, biases), tf.shape(conv))

    # Apply relu function
    relu = tf.nn.relu(bias, name=scope.name)

    return relu


def fc(x, num_in, num_out, name, relu=True):
    """Create a fully connected layer."""
    with tf.variable_scope(name) as scope:

        # Create tf variables for the weights and biases
        weights = tf.get_variable('weights', shape=[num_in, num_out],
                                  trainable=True)
        biases = tf.get_variable('biases', [num_out], trainable=True)
        l2_loss_fc = tf.multiply(tf.nn.l2_loss(weights), 0.001)
        tf.add_to_collection('losses', l2_loss_fc)
        # Matrix multiply weights and inputs and add bias
        act = tf.nn.xw_plus_b(x, weights, biases, name=scope.name)

    if relu:
        relu = tf.nn.relu(act)
        return relu
    else:
        return act


def max_pool(x, filter_height, filter_width, stride_y, stride_x, name, padding='SAME'):
    return tf.nn.max_pool(x, ksize=[1, filter_height, filter_width, 1],
                          strides=[1, stride_y, stride_x, 1],
                          padding=padding, name=name)


def lrn(x, radius, alpha, beta, name, bias=1.0):
    return tf.nn.local_response_normalization(x, depth_radius=radius,
                                              alpha=alpha, beta=beta,
                                              bias=bias, name=name)


def dropout(x, keep_prob):
    return tf.nn.dropout(x, keep_prob)

#运行和测试AlexNet
在images文件夹下,有3张图片:
Tensorflow实现AlexNet_第1张图片
图像数据的路径和label统一放在train.txt和val.txt中,形式为:

images/llama.jpeg 355
images/sealion.jpeg 150
images/zebra.jpeg 340

其中序号为该图片在ImageNet中的编号,完整的标注文件可从本文开头链接下载。
http://www.cs.toronto.edu/~guerzhoy/tf_alexnet/在此链接中可以下载AlexNet网络结构参数,在本程序中默认调用load_initial_weights方法就可以将参数导入,不需要训练。
在如下代码中,因为直接load进入参数,所以将训练操作注释掉,直接进行训练操作。

finetune.py

"""Script to finetune AlexNet using Tensorflow.

With this script you can finetune AlexNet as provided in the alexnet.py
class on any given dataset. Specify the configuration settings at the
beginning according to your problem.
This script was written for TensorFlow >= version 1.2rc0 and comes with a blog
post, which you can find here:

https://kratzert.github.io/2017/02/24/finetuning-alexnet-with-tensorflow.html

Author: Frederik Kratzert
contact: f.kratzert(at)gmail.com
"""

import os

import numpy as np
import tensorflow as tf

from alexnet import AlexNet
from datagenerator import ImageDataGenerator
from datetime import datetime


"""
Configuration Part.
"""

# Path to the textfiles for the trainings and validation set
train_file = '/path/to/train.txt'
val_file = 'val.txt'

# Learning params
learning_rate = 0.01
num_epochs = 10
batch_size = 1

# Network params
dropout_rate = 0.5
num_classes = 1000
train_layers = []

# How often we want to write the tf.summary data to disk
display_step = 20


"""
Main Part of the finetuning Script.
"""

# Place data loading and preprocessing on the cpu
with tf.device('/cpu:0'):
    '''
    tr_data = ImageDataGenerator(train_file,
                                 mode='training',
                                 batch_size=batch_size,
                                 num_classes=num_classes,
                                 shuffle=True)
    '''
    val_data = ImageDataGenerator(val_file,
                                  mode='inference',
                                  batch_size=batch_size,
                                  num_classes=num_classes,
                                  shuffle=False)

    # create an reinitializable iterator given the dataset structure
    iterator = tf.data.Iterator.from_structure(val_data.data.output_types,
                                               val_data.data.output_shapes)
    next_batch = iterator.get_next()

# Ops for initializing the two different iterators
# training_init_op = iterator.make_initializer(tr_data.data)
validation_init_op = iterator.make_initializer(val_data.data)

# TF placeholder for graph input and output
x = tf.placeholder(tf.float32, [batch_size, 227, 227, 3])
y = tf.placeholder(tf.int32, [batch_size])
keep_prob = tf.placeholder(tf.float32)

# Initialize model
model = AlexNet(x, keep_prob, num_classes, train_layers)

# Link variable to model output
score = model.fc8
'''
# List of trainable variables of the layers we want to train
var_list = [v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layers]

# Op for calculating the loss
with tf.name_scope("cross_ent"):
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits
                         (logits=score, labels=y))

# Train op
with tf.name_scope("train"):
    # Get gradients of all trainable variables
    gradients = tf.gradients(loss, var_list)
    gradients = list(zip(gradients, var_list))

    # Create optimizer and apply gradient descent to the trainable variables
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_op = optimizer.apply_gradients(grads_and_vars=gradients)
'''

# Evaluation op: Accuracy of the model
with tf.name_scope("accuracy"):
    accuracy_1 = tf.nn.in_top_k(score, y, 1)  # Top1 Accuracy
    accuracy_5 = tf.nn.in_top_k(score, y, 5)  # Top5 Accuracy


# Get the number of training/validation steps per epoch
# train_batches_per_epoch = int(np.floor(tr_data.data_size/batch_size))
val_batches_per_epoch = int(np.floor(val_data.data_size / batch_size))

# Start Tensorflow session
with tf.Session() as sess:

    # Initialize all variables
    sess.run(tf.global_variables_initializer())

    # Load the pretrained weights into the non-trainable layer
    model.load_initial_weights(sess)

    # print("{} Start training...".format(datetime.now()))

    # Loop over number of epochs
    for epoch in range(num_epochs):

        print("{} Epoch number: {}".format(datetime.now(), epoch+1))
        '''
        # Initialize iterator with the training dataset
        sess.run(training_init_op)

        for step in range(train_batches_per_epoch):

            # get next batch of data
            img_batch, label_batch = sess.run(next_batch)

            # And run the training op
            sess.run(train_op, feed_dict={x: img_batch,
                                          y: label_batch,
                                          keep_prob: dropout_rate})
    '''
        # Validate the model on the entire validation set
        print("{} Start validation".format(datetime.now()))
        sess.run(validation_init_op)
        test_acc1 = 0
        test_acc5 = 0
        test_count = 0
        for _ in range(val_batches_per_epoch):
            img_batch, label_batch = sess.run(next_batch)
            acc1, acc5 = sess.run([accuracy_1, accuracy_5],
                                  feed_dict={x: img_batch,
                                             y: label_batch,
                                             keep_prob: 1.})
            test_acc1 += np.sum(acc1)
            test_acc5 += np.sum(acc5)
            test_count += batch_size
        test_acc1 /= test_count
        test_acc5 /= test_count
        print("Validation Top1/Top5 Accuracy = {:.4f}/{:.4f}".
              format(test_acc1, test_acc5))

输出测试结果的Top1和Top5错误率:

2018-06-09 20:40:49.905062 Epoch number: 1
2018-06-09 20:40:49.905062 Start validation
Validation Top1/Top5 Accuracy = 1.0000/1.0000
2018-06-09 20:40:50.619143 Epoch number: 2
2018-06-09 20:40:50.619143 Start validation
Validation Top1/Top5 Accuracy = 1.0000/1.0000
......

你可能感兴趣的:(深度学习与TensorFlow)