参考链接:
AlexNet论文
csdn博客:AlexNet网络结构解析
Caffe AlexNet实现
ImageNet数据集下载:ILSVRC2012
ILSVRC2012数据集label信息下载
ImageNet数据集处理by Caffe
caffe针对imagenet相关文件详解
kratzert AlexNet TensorFlow开源实现
kratzert AlexNet TensorFlow开源实现解析
借鉴以上链接:从图像导入以及AlexNet网络结构搭建以及测试,完整过程如下:
版权声明:本程序修改自https://github.com/kratzert/finetune_alexnet_with_tensorflow/tree/5d751d62eb4d7149f4e3fd465febf8f07d4cea9d
。 如果要应用本程序代码,请务必在评论区留言征得本人同意!
制作ImageNet2012数据集,并进行预处理
图像的均值文件下载地址:https://github.com/BVLC/caffe/blob/master/python/caffe/imagenet/ilsvrc_2012_mean.npy
datagenerator.py:
"""Containes a helper class for image input pipelines in tensorflow."""
import tensorflow as tf
import numpy as np
import os
from tensorflow.python.framework import dtypes
from tensorflow.python.framework.ops import convert_to_tensor
image_mean = np.load('ilsvrc_2012_mean.npy')
image_mean = np.swapaxes(image_mean, 0, 2)
image_mean = image_mean[:,:,::-1]
class ImageDataGenerator(object):
"""Wrapper class around the new Tensorflows dataset pipeline.
"""
def __init__(self, txt_file, mode, imgfile_folder, batch_size, num_classes, shuffle=True,
buffer_size=1000):
self.txt_file = txt_file
self.imgfile_folder = imgfile_folder
self.num_classes = num_classes
# retrieve the data from the text file
self._read_txt_file()
# number of samples in the dataset
self.data_size = len(self.labels)
# initial shuffling of the file and label lists (together!)
if shuffle:
self._shuffle_lists()
# convert lists to TF tensor
self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string)
self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32)
# create dataset
data = tf.data.Dataset.from_tensor_slices((self.img_paths, self.labels))
# distinguish between train/infer. when calling the parsing functions
if mode == 'training':
# param: output_buffer_size = 100 * batch_size
data = data.map(self._parse_function_train, num_parallel_calls=7)
elif mode == 'inference':
# param: output_buffer_size = 100 * batch_size
data = data.map(self._parse_function_inference, num_parallel_calls=7)
else:
raise ValueError("Invalid mode '%s'." % (mode))
# shuffle the first `buffer_size` elements of the dataset
if shuffle:
data = data.shuffle(buffer_size=buffer_size)
# create a new dataset with batches of images
data = data.batch(batch_size)
self.data = data
def _read_txt_file(self, ):
"""Read the content of the text file and store it into lists."""
cur_path = os.path.join(os.path.split(os.getcwd())[0], self.imgfile_folder)
self.img_paths = []
self.labels = []
with open(self.txt_file, 'r') as f:
lines = f.readlines()
for line in lines:
items = line.split(' ')
self.img_paths.append(os.path.join(cur_path, items[0]))
self.labels.append(int(items[1]))
def _shuffle_lists(self):
"""Conjoined shuffling of the list of paths and labels."""
path = self.img_paths
labels = self.labels
permutation = np.random.permutation(self.data_size)
self.img_paths = []
self.labels = []
for i in permutation:
self.img_paths.append(path[i])
self.labels.append(labels[i])
def _parse_function_train(self, filename, label):
"""Input parser for samples of the training set."""
# convert label number into one-hot-encoding
# one_hot = tf.one_hot(label, self.num_classes)
img_string = tf.read_file(filename)
img_decoded = tf.image.decode_png(img_string, channels=3)
img_resize = tf.image.resize_images(img_decoded, [256, 256], method=3) # 缩放尺寸为[256, 256]
img_resize = tf.subtract(img_resize, image_mean) # 减去平均值
img_resized = tf.random_crop(img_resize, [227, 227, 3]) # 随机裁切为[227,227,3]
# RGB -> BGR
img_bgr = img_resized[:, :, ::-1]
return img_bgr, label
def _parse_function_inference(self, filename, label):
"""Input parser for samples of the validation/test set."""
# convert label number into one-hot-encoding
# one_hot = tf.one_hot(label, self.num_classes)
img_string = tf.read_file(filename)
img_decoded = tf.image.decode_png(img_string, channels=3)
img_resize = tf.image.resize_images(img_decoded, [256,256], method=3)
img_resize = tf.subtract(img_resize, image_mean)
img_resized = tf.image.resize_image_with_crop_or_pad(img_resize, 227, 227) # 中心裁剪[227,227]
img_resized = tf.cast(img_resized, tf.float32)
# RGB -> BGR
img_bgr = img_resized[:, :, ::-1]
return img_bgr, label
#AlexNet网络结构定义为类
alexnet.py:
import tensorflow as tf
import numpy as np
class AlexNet(object):
def __init__(self, x, keep_prob6, keep_prob7, num_classes, skip_layer, weights_path):
# Parse input arguments into class variables
self.X = x
self.NUM_CLASSES = num_classes
self.KEEP_PROB6 = keep_prob6
self.KEEP_PROB7 = keep_prob7
self.SKIP_LAYER = skip_layer
self.WEIGHTS_PATH = weights_path
# Call the create function to build the computational graph of AlexNet
self.create()
def create(self):
# 1st Layer: Conv (w ReLu) -> Lrn -> Pool
conv1 = conv(self.X, 11, 11, 96, 4, 4, padding='VALID', name='conv1')
norm1 = lrn(conv1, 2, 2e-05, 0.75, name='norm1')
pool1 = max_pool(norm1, 3, 3, 2, 2, padding='VALID', name='pool1')
# 2nd Layer: Conv (w ReLu) -> Lrn -> Pool with 2 groups
conv2 = conv(pool1, 5, 5, 256, 1, 1, groups=2, name='conv2')
norm2 = lrn(conv2, 2, 2e-05, 0.75, name='norm2')
pool2 = max_pool(norm2, 3, 3, 2, 2, padding='VALID', name='pool2')
# 3rd Layer: Conv (w ReLu)
conv3 = conv(pool2, 3, 3, 384, 1, 1, name='conv3')
# 4th Layer: Conv (w ReLu) splitted into two groups
conv4 = conv(conv3, 3, 3, 384, 1, 1, groups=2, name='conv4')
# 5th Layer: Conv (w ReLu) -> Pool splitted into two groups
conv5 = conv(conv4, 3, 3, 256, 1, 1, groups=2, name='conv5')
pool5 = max_pool(conv5, 3, 3, 2, 2, padding='VALID', name='pool5')
# 6th Layer: Flatten -> FC (w ReLu) -> Dropout
flattened = tf.reshape(pool5, [-1, 6*6*256])
fc6 = fc(flattened, 6*6*256, 4096, name='fc6')
dropout6 = dropout(fc6, self.KEEP_PROB6)
# 7th Layer: FC (w ReLu) -> Dropout
fc7 = fc(dropout6, 4096, 4096, name='fc7')
dropout7 = dropout(fc7, self.KEEP_PROB7)
# 8th Layer: FC and return unscaled activations
self.fc8 = fc(dropout7, 4096, self.NUM_CLASSES, relu=False, name='fc8')
def load_initial_weights(self, session):
weights = np.load(self.WEIGHTS_PATH)
weights_dict = {}
for k in weights.keys():
layer_name = k.split('_')[0]
if layer_name not in weights_dict.keys():
weights_dict[layer_name] = []
weights_dict[layer_name].append(weights[k].copy())
# Loop over all layer names stored in the weights dict
for op_name in weights_dict:
with tf.variable_scope(op_name, reuse=True):
# Assign weights/biases to their corresponding tf variable
for data in weights_dict[op_name]:
# Biases
if len(data.shape) == 1:
var = tf.get_variable('biases')
update = var.assign(data)
session.run(update)
# Weights
else:
var = tf.get_variable('weights')
update = var.assign(data)
session.run(update)
# load params into computational graph
def load_params(self, session, param_dict):
for i,v in enumerate(tf.trainable_variables()):
v.load(param_dict[i], session)
# get params into computational graph
def get_params(self, session):
layer = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5', 'fc6', 'fc7', 'fc8']
param = []
for op_id in range(len(layer)):
with tf.variable_scope(layer[op_id], reuse=True):
varw = tf.get_variable('weights')
param.append(session.run(varw))
varb = tf.get_variable('biases')
param.append(session.run(varb))
return param
def conv(x, filter_height, filter_width, num_filters, stride_y, stride_x, name,
padding='SAME', groups=1):
# Get number of input channels
input_channels = int(x.get_shape()[-1])
# Create lambda function for the convolution
convolve = lambda i, k: tf.nn.conv2d(i, k,
strides=[1, stride_y, stride_x, 1],
padding=padding)
with tf.variable_scope(name) as scope:
# Create tf variables for the weights and biases of the conv layer
weights = tf.get_variable('weights', shape=[filter_height,
filter_width,
input_channels/groups,
num_filters])
biases = tf.get_variable('biases', shape=[num_filters])
l2_loss_conv = tf.multiply(tf.nn.l2_loss(weights), 0.001)
tf.add_to_collection('losses', l2_loss_conv)
if groups == 1:
conv = convolve(x, weights)
# In the cases of multiple groups, split inputs & weights and
else:
# 拿conv2举例:
# 权重由(5,5,48,256)分解为(5,5,48,128)和(5,5,48,128)
# 数据由(1,27,27,96)分解为(1,27,27,48)和(1,27,27,48)
# Split input and weights and convolve them separately
input_groups = tf.split(axis=3, num_or_size_splits=groups, value=x)
weight_groups = tf.split(axis=3, num_or_size_splits=groups, value=weights)
output_groups = [convolve(i, k) for i, k in zip(input_groups, weight_groups)]
# Concat the convolved output together again
conv = tf.concat(axis=3, values=output_groups)
# Add biases
bias = tf.reshape(tf.nn.bias_add(conv, biases), tf.shape(conv))
# Apply relu function
relu = tf.nn.relu(bias, name=scope.name)
return relu
def fc(x, num_in, num_out, name, relu=True):
"""Create a fully connected layer."""
with tf.variable_scope(name) as scope:
# Create tf variables for the weights and biases
weights = tf.get_variable('weights', shape=[num_in, num_out],
trainable=True)
biases = tf.get_variable('biases', [num_out], trainable=True)
l2_loss_fc = tf.multiply(tf.nn.l2_loss(weights), 0.001)
tf.add_to_collection('losses', l2_loss_fc)
# Matrix multiply weights and inputs and add bias
act = tf.nn.xw_plus_b(x, weights, biases, name=scope.name)
if relu:
relu = tf.nn.relu(act)
return relu
else:
return act
def max_pool(x, filter_height, filter_width, stride_y, stride_x, name, padding='SAME'):
return tf.nn.max_pool(x, ksize=[1, filter_height, filter_width, 1],
strides=[1, stride_y, stride_x, 1],
padding=padding, name=name)
def lrn(x, radius, alpha, beta, name, bias=1.0):
return tf.nn.local_response_normalization(x, depth_radius=radius,
alpha=alpha, beta=beta,
bias=bias, name=name)
def dropout(x, keep_prob):
return tf.nn.dropout(x, keep_prob)
#运行和测试AlexNet
在images文件夹下,有3张图片:
图像数据的路径和label统一放在train.txt和val.txt中,形式为:
images/llama.jpeg 355
images/sealion.jpeg 150
images/zebra.jpeg 340
其中序号为该图片在ImageNet中的编号,完整的标注文件可从本文开头链接下载。
http://www.cs.toronto.edu/~guerzhoy/tf_alexnet/在此链接中可以下载AlexNet网络结构参数,在本程序中默认调用load_initial_weights方法就可以将参数导入,不需要训练。
在如下代码中,因为直接load进入参数,所以将训练操作注释掉,直接进行训练操作。
finetune.py
"""Script to finetune AlexNet using Tensorflow.
With this script you can finetune AlexNet as provided in the alexnet.py
class on any given dataset. Specify the configuration settings at the
beginning according to your problem.
This script was written for TensorFlow >= version 1.2rc0 and comes with a blog
post, which you can find here:
https://kratzert.github.io/2017/02/24/finetuning-alexnet-with-tensorflow.html
Author: Frederik Kratzert
contact: f.kratzert(at)gmail.com
"""
import os
import numpy as np
import tensorflow as tf
from alexnet import AlexNet
from datagenerator import ImageDataGenerator
from datetime import datetime
"""
Configuration Part.
"""
# Path to the textfiles for the trainings and validation set
train_file = '/path/to/train.txt'
val_file = 'val.txt'
# Learning params
learning_rate = 0.01
num_epochs = 10
batch_size = 1
# Network params
dropout_rate = 0.5
num_classes = 1000
train_layers = []
# How often we want to write the tf.summary data to disk
display_step = 20
"""
Main Part of the finetuning Script.
"""
# Place data loading and preprocessing on the cpu
with tf.device('/cpu:0'):
'''
tr_data = ImageDataGenerator(train_file,
mode='training',
batch_size=batch_size,
num_classes=num_classes,
shuffle=True)
'''
val_data = ImageDataGenerator(val_file,
mode='inference',
batch_size=batch_size,
num_classes=num_classes,
shuffle=False)
# create an reinitializable iterator given the dataset structure
iterator = tf.data.Iterator.from_structure(val_data.data.output_types,
val_data.data.output_shapes)
next_batch = iterator.get_next()
# Ops for initializing the two different iterators
# training_init_op = iterator.make_initializer(tr_data.data)
validation_init_op = iterator.make_initializer(val_data.data)
# TF placeholder for graph input and output
x = tf.placeholder(tf.float32, [batch_size, 227, 227, 3])
y = tf.placeholder(tf.int32, [batch_size])
keep_prob = tf.placeholder(tf.float32)
# Initialize model
model = AlexNet(x, keep_prob, num_classes, train_layers)
# Link variable to model output
score = model.fc8
'''
# List of trainable variables of the layers we want to train
var_list = [v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layers]
# Op for calculating the loss
with tf.name_scope("cross_ent"):
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits
(logits=score, labels=y))
# Train op
with tf.name_scope("train"):
# Get gradients of all trainable variables
gradients = tf.gradients(loss, var_list)
gradients = list(zip(gradients, var_list))
# Create optimizer and apply gradient descent to the trainable variables
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.apply_gradients(grads_and_vars=gradients)
'''
# Evaluation op: Accuracy of the model
with tf.name_scope("accuracy"):
accuracy_1 = tf.nn.in_top_k(score, y, 1) # Top1 Accuracy
accuracy_5 = tf.nn.in_top_k(score, y, 5) # Top5 Accuracy
# Get the number of training/validation steps per epoch
# train_batches_per_epoch = int(np.floor(tr_data.data_size/batch_size))
val_batches_per_epoch = int(np.floor(val_data.data_size / batch_size))
# Start Tensorflow session
with tf.Session() as sess:
# Initialize all variables
sess.run(tf.global_variables_initializer())
# Load the pretrained weights into the non-trainable layer
model.load_initial_weights(sess)
# print("{} Start training...".format(datetime.now()))
# Loop over number of epochs
for epoch in range(num_epochs):
print("{} Epoch number: {}".format(datetime.now(), epoch+1))
'''
# Initialize iterator with the training dataset
sess.run(training_init_op)
for step in range(train_batches_per_epoch):
# get next batch of data
img_batch, label_batch = sess.run(next_batch)
# And run the training op
sess.run(train_op, feed_dict={x: img_batch,
y: label_batch,
keep_prob: dropout_rate})
'''
# Validate the model on the entire validation set
print("{} Start validation".format(datetime.now()))
sess.run(validation_init_op)
test_acc1 = 0
test_acc5 = 0
test_count = 0
for _ in range(val_batches_per_epoch):
img_batch, label_batch = sess.run(next_batch)
acc1, acc5 = sess.run([accuracy_1, accuracy_5],
feed_dict={x: img_batch,
y: label_batch,
keep_prob: 1.})
test_acc1 += np.sum(acc1)
test_acc5 += np.sum(acc5)
test_count += batch_size
test_acc1 /= test_count
test_acc5 /= test_count
print("Validation Top1/Top5 Accuracy = {:.4f}/{:.4f}".
format(test_acc1, test_acc5))
输出测试结果的Top1和Top5错误率:
2018-06-09 20:40:49.905062 Epoch number: 1
2018-06-09 20:40:49.905062 Start validation
Validation Top1/Top5 Accuracy = 1.0000/1.0000
2018-06-09 20:40:50.619143 Epoch number: 2
2018-06-09 20:40:50.619143 Start validation
Validation Top1/Top5 Accuracy = 1.0000/1.0000
......