包括Tensorflow下的 AlexNet 网络模型搭建、测试、使用自己的数据做 fine-tunning、批量测试训练结果
主要参考文章《Finetuning AlexNet with TensorFlow》,文章《TensorFlow微调AlexNet》是对其的翻译
tensorflow 比 caffe 要快很多。。。
每个卷积和池化层后添加了输出尺寸的计算,方便fine-tunning时对于不同尺寸(227*227)的处理
与Caffe需要指明 pad 值不同,Tensorflow通过 ‘VALID’ 和 ‘SAME’ 区分两种 padding 方法
def create(self):
# 1st Layer: Conv (w ReLu) -> Pool -> Lrn
conv1 = conv(self.X, 11, 11, 96, 4, 4, padding = 'VALID', name = 'conv1')
pool1 = max_pool(conv1, 3, 3, 2, 2, padding = 'VALID', name = 'pool1')
norm1 = lrn(pool1, 2, 2e-05, 0.75, name = 'norm1'
# calc output size
out_h = int(math.ceil(float(self.IN_SIZE[0]-11+1)/4))
out_w = int(math.ceil(float(self.IN_SIZE[1]-11+1)/4))
out_h = int(math.ceil(float(out_h-3+1)/2))
out_w = int(math.ceil(float(out_w-3+1)/2))
# 2nd Layer: Conv (w ReLu) -> Pool -> Lrn with 2 groups
conv2 = conv(norm1, 5, 5, 256, 1, 1, groups = 2, name = 'conv2')
pool2 = max_pool(conv2, 3, 3, 2, 2, padding = 'VALID', name ='pool2')
norm2 = lrn(pool2, 2, 2e-05, 0.75, name = 'norm2')
# calc output size
out_h = int(math.ceil(float(out_h-3+1)/2))
out_w = int(math.ceil(float(out_w-3+1)/2))
# 3rd Layer: Conv (w ReLu)
conv3 = conv(norm2, 3, 3, 384, 1, 1, name = 'conv3')
# 4th Layer: Conv (w ReLu) splitted into two groups
conv4 = conv(conv3, 3, 3, 384, 1, 1, groups = 2, name = 'conv4')
# 5th Layer: Conv (w ReLu) -> Pool splitted into two groups
conv5 = conv(conv4, 3, 3, 256, 1, 1, groups = 2, name = 'conv5')
pool5 = max_pool(conv5, 3, 3, 2, 2, padding = 'VALID', name = 'pool5')
# calc output size
out_h = int(math.ceil(float(out_h-3+1)/2))
out_w = int(math.ceil(float(out_w-3+1)/2))
# 6th Layer: Flatten -> FC (w ReLu) -> Dropout
flattened = tf.reshape(pool5, [-1, out_h*out_w*256])
fc6 = fc(flattened, out_h*out_w*256, 4096, name='fc6')
dropout6 = dropout(fc6, self.KEEP_PROB)
# 7th Layer: FC (w ReLu) -> Dropout
fc7 = fc(dropout6, 4096, 4096, name = 'fc7')
dropout7 = dropout(fc7, self.KEEP_PROB)
# 8th Layer: FC and return unscaled activations
# (for tf.nn.softmax_cross_entropy_with_logits)
self.fc8 = fc(dropout7, 4096, self.NUM_CLASSES, relu = False, name='fc8')
def conv(x, filter_height, filter_width, num_filters, stride_y, stride_x, name, padding='SAME', groups=1):
# Get number of input channels
input_channels = int(x.get_shape()[-1])
# Create lambda function for the convolution
convolve = lambda i, k: tf.nn.conv2d(i, k,
strides = [1, stride_y, stride_x, 1],
padding = padding)
with tf.variable_scope(name) as scope:
# Create tf variables for the weights and biases of the conv layer
weights = tf.get_variable('weights',
shape = [filter_height, filter_width,
input_channels/groups, num_filters])
biases = tf.get_variable('biases', shape = [num_filters])
if groups == 1:
conv = convolve(x, weights)
# In the cases of multiple groups, split inputs & weights and
else:
# Split input and weights and convolve them separately
input_groups = tf.split(axis = 3, num_or_size_splits=groups, value=x)
weight_groups = tf.split(axis = 3, num_or_size_splits=groups, value=weights)
output_groups = [convolve(i, k) for i,k in zip(input_groups, weight_groups)]
# Concat the convolved output together again
conv = tf.concat(axis = 3, values = output_groups)
# Add biases
bias = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape().as_list())
# Apply relu function
relu = tf.nn.relu(bias, name = scope.name)
return relu
def fc(x, num_in, num_out, name, relu = True):
with tf.variable_scope(name) as scope:
# Create tf variables for the weights and biases
weights = tf.get_variable('weights', shape=[num_in, num_out], trainable=True)
biases = tf.get_variable('biases', [num_out], trainable=True)
# Matrix multiply weights and inputs and add bias
act = tf.nn.xw_plus_b(x, weights, biases, name=scope.name)
if relu == True:
# Apply ReLu non linearity
relu = tf.nn.relu(act)
return relu
else:
return act
def max_pool(x, filter_height, filter_width, stride_y, stride_x, name, padding='SAME'):
return tf.nn.max_pool(x, ksize=[1, filter_height, filter_width, 1],
strides = [1, stride_y, stride_x, 1],
padding = padding, name = name)
def lrn(x, radius, alpha, beta, name, bias=1.0):
return tf.nn.local_response_normalization(x, depth_radius = radius,
alpha = alpha, beta = beta,
bias = bias, name = name)
def dropout(x, keep_prob):
return tf.nn.dropout(x, keep_prob)
《caffe-tensorflow》提供了从 caffe 模型到 tensorflow 模型转换的工具
本文直接从这里下载.npy模型
def load_initial_weights(self, session):
# Load the weights into memory
weights_dict = np.load(self.WEIGHTS_PATH, encoding = 'bytes').item()
# Loop over all layer names stored in the weights dict
for op_name in weights_dict:
# Check if the layer is one of the layers that should be reinitialized
if op_name not in self.SKIP_LAYER:
with tf.variable_scope(op_name, reuse = True):
# Loop over list of weights/biases and assign them to their corresponding tf variable
for data in weights_dict[op_name]:
# Biases
if len(data.shape) == 1:
var = tf.get_variable('biases', trainable = False)
session.run(var.assign(data))
# Weights
else:
var = tf.get_variable('weights', trainable = False)
session.run(var.assign(data))
class alexnet_test(object):
def __init__(self):
self.PRE_MODEL = 'bvlc_alexnet.npy'
def test_imagenet(self, imgs_):
num_classes = 1000
skip_layer = []
imgs = []
#mean of imagenet dataset in BGR
imagenet_mean = np.array([104., 117., 124.], dtype=np.float32)
#plot images
fig = plt.figure(figsize=(15,6))
for i, img_ in enumerate(imgs_):
img = cv2.imread(img_)
imgs.append(img)
fig.add_subplot(1,3,i+1)
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.axis('off')
#placeholder for input and dropout rate
x = tf.placeholder(tf.float32, [1, 227, 227, 3])
keep_prob = tf.placeholder(tf.float32)
#create model with default config ( == no skip_layer and 1000 units in the last layer)
model = alexnet(x, keep_prob, num_classes, skip_layer, weights_path=self.PRE_MODEL)
#define activation of last layer as score
score = model.fc8
#create op to calculate softmax
softmax = tf.nn.softmax(score)
with tf.Session() as sess:
# Initialize all variables
sess.run(tf.global_variables_initializer())
# Load the pretrained weights into the model
model.load_initial_weights(sess)
# Create figure handle
fig2 = plt.figure(figsize=(15,6))
# Loop over all images
for i, image in enumerate(imgs):
# Convert image to float32 and resize to (227x227)
img = cv2.resize(image.astype(np.float32), (227,227))
# Subtract the ImageNet mean
img -= imagenet_mean
# Reshape as needed to feed into model
img = img.reshape((1,227,227,3))
# Run the session and calculate the class probability
probs = sess.run(softmax, feed_dict={x: img, keep_prob: 1})
# Get the class name of the class with the highest probability
class_name = class_names[np.argmax(probs)]
# Plot image with class name and prob in the title
fig2.add_subplot(1,3,i+1)
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
plt.title("Class: " + class_name + ", probability: %.4f" %probs[0,np.argmax(probs)])
plt.axis('off')
plt.show()
def fine_tuning(self, train_list, test_list, mean, snapshot, filewriter_path):
# Learning params
learning_rate = 0.001
num_epochs = 80000
batch_size = 50
# Network params
in_img_size = (332, 675) #(height, width)
dropout_rate = 0.5
num_classes = 6
train_layers = ['fc6', 'fc7', 'fc8']
# How often we want to write the tf.summary data to disk
display_step = 40
x = tf.placeholder(tf.float32, [batch_size, in_img_size[0], in_img_size[1], 3])
y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32)
# Initialize model
model = alexnet(x, keep_prob, num_classes, train_layers, in_size=in_img_size
#link variable to model output
score = model.fc8
# List of trainable variables of the layers we want to train
var_list = [v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layers]
# Op for calculating the loss
with tf.name_scope("cross_ent"):
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = score, labels = y))
# Train op
with tf.name_scope("train"):
# Get gradients of all trainable variables
gradients = tf.gradients(loss, var_list)
gradients = list(zip(gradients, var_list))
# Create optimizer and apply gradient descent to the trainable variables
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.apply_gradients(grads_and_vars=gradients)
# Add gradients to summary
for gradient, var in gradients:
tf.summary.histogram(var.name + '/gradient', gradient)
# Add the variables we train to the summary
for var in var_list:
tf.summary.histogram(var.name, var)
# Add the loss to summary
tf.summary.scalar('cross_entropy', loss)
# Evaluation op: Accuracy of the model
with tf.name_scope("accuracy"):
correct_pred = tf.equal(tf.argmax(score, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Add the accuracy to the summary
tf.summary.scalar('accuracy', accuracy)
# Merge all summaries together
merged_summary = tf.summary.merge_all()
# Initialize the FileWriter
writer = tf.summary.FileWriter(filewriter_path)
# Initialize an saver for store model checkpoints
saver = tf.train.Saver()
# Initalize the data generator seperately for the training and validation set
train_generator = ImageDataGenerator(train_list, horizontal_flip = True, shuffle = False, mean=mean, scale_size=in_img_size, nb_classes=num_classes)
val_generator = ImageDataGenerator(test_list, shuffle = False, mean=mean, scale_size=in_img_size, nb_classes=num_classes)
# Get the number of training/validation steps per epoch
train_batches_per_epoch = np.floor(train_generator.data_size / batch_size).astype(np.int16)
val_batches_per_epoch = np.floor(val_generator.data_size / batch_size).astype(np.int16)
# Start Tensorflow session
with tf.Session() as sess:
# Initialize all variables
sess.run(tf.global_variables_initializer())
# Add the model graph to TensorBoard
writer.add_graph(sess.graph)
# Load the pretrained weights into the non-trainable layer
model.load_initial_weights(sess)
print("{} Start training...".format(datetime.now()))
print("{} Open Tensorboard at --logdir {}".format(datetime.now(), filewriter_path))
# Loop over number of epochs
for epoch in range(num_epochs):
print("{} Epoch number: {}/{}".format(datetime.now(), epoch+1, num_epochs))
step = 1
while step < train_batches_per_epoch:
# Get a batch of images and labels
batch_xs, batch_ys = train_generator.next_batch(batch_size)
# And run the training op
sess.run(train_op, feed_dict={x: batch_xs, y: batch_ys, keep_prob: dropout_rate})
# Generate summary with the current batch of data and write to file
if step%display_step == 0:
s = sess.run(merged_summary, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
writer.add_summary(s, epoch*train_batches_per_epoch + step)
step += 1
# Validate the model on the entire validation set
print("{} Start validation".format(datetime.now()))
test_acc = 0.
test_count = 0
for _ in range(val_batches_per_epoch):
batch_tx, batch_ty = val_generator.next_batch(batch_size)
acc = sess.run(accuracy, feed_dict={x: batch_tx, y: batch_ty, keep_prob: 1.})
test_acc += acc
test_count += 1
test_acc /= test_count
print("{} Validation Accuracy = {:.4f}".format(datetime.now(), test_acc))
# Reset the file pointer of the image data generator
val_generator.reset_pointer()
train_generator.reset_pointer()
print("{} Saving checkpoint of model...".format(datetime.now()))
#save checkpoint of the model
if epoch % display_step == 0:
checkpoint_name = os.path.join(snapshot, 'model_epoch'+str(epoch)+'.ckpt')
save_path = saver.save(sess, checkpoint_name)
print("{} Model checkpoint saved at {}".format(datetime.now(), checkpoint_name))
另一个终端,tensorboard –logdir train_log,打开浏览器,127.0.0.1:6006 查看训练细节
def predict_batch(self, val_list, mean, weight_file, result_file):
in_img_size = (332, 675) #(height, width)
dropout_rate = 0.5
num_classes = 6
train_layers = []
x = tf.placeholder(tf.float32, [1, in_img_size[0], in_img_size[1], 3])
model = alexnet(x, 1., num_classes, train_layers, in_size=in_img_size, weights_path=weight_file)
score = model.fc8
softmax = tf.nn.softmax(score)
val_generator = ImageDataGenerator(val_list, horizontal_flip = False, shuffle = False, mean=mean, scale_size=in_img_size, nb_classes=num_classes)
precision = np.zeros((num_classes+1, num_classes), dtype=np.float)
total_presion = 0.
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
tf.train.Saver().restore(sess, weight_file)
self._start_end_time[0] = time.clock()
for index in range(val_generator.data_size):
print 'handing %d / %d ...\r'%(index+1, val_generator.data_size),
img_ = val_generator.images[index]
label = val_generator.labels[index]
img = cv2.imread(img_)
img = cv2.resize(img, (val_generator.scale_size[1], val_generator.scale_size[0]))
img = img.reshape(1, val_generator.scale_size[0], val_generator.scale_size[1], 3)
img = img.astype(np.float32)
probs = sess.run(softmax, feed_dict={x: img})
guess = np.argmax(probs)
if guess == label:
precision[guess][guess] += 1
total_presion += 1
else:
precision[guess][int(val_generator.labels[index])] += 1
self._start_end_time[1] = time.clock()
for i in range(num_classes):
for j in range(num_classes):
precision[num_classes][i] += precision[j][i]
for i in range(num_classes):
for j in range(num_classes):
precision[i][j] /= precision[num_classes][j]
total_presion /= val_generator.data_size
slaped = (self._start_end_time[1] - self._start_end_time[0]) / val_generator.data_size
file = open(result_file, 'w')
file.write('model: ' + weight_file + '\n')
print '\n#####################################################################'
file.writelines(['################################################################\n'])
text_ = ''
for i in range(num_classes):
print ' %d'%i,
text_ += ' %d'%i
print '\n'
file.write(text_ + '\n')
for i in range(num_classes):
print ' %d'%i,
file.write(' ' + str(i))
for j in range(num_classes):
str_preci = ' %.2f'%precision[i][j]
print ' %.2f '%precision[i][j],
file.write(str_preci)
print '\n'
file.write('\n')
print '\ntotal precision: %.2f'%total_presion
print 'average speed: %.4f / image'%slaped
str_preci = 'total precision: %.2f'%total_presion
file.writelines(['\n' + str_preci + '\n'])
str_slaped = 'average speed: %.4f s / image'%slaped
file.write(str_slaped + '\n')
file.close()
代码:https://github.com/yayo13/tensorflow_finetunning_alexnet