本文复现AI challenger的官方baseline模型,数据通过tfrecord和队列来供给。
“This simple model consists of three convolutional layers, three max pool layers and two fully connected layers. Local response normalization and dropout are also used. Details of network structure is in network.py.”只是近似复现,因为数据的预处理,batch_size等部分不是完全一样。
注意:手头没有合适的GPU,这个代码的结果没有汇报。相比于上篇AI challenger 场景分类 train test softmax只做了少量更改。
task1_train_val.py
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 20 16:05:02 2017
@author: wayne
CHANGES
- 复现场景分类官方baseline model, 注意对图片的resize处理不完全一样!!!
TODO
- NEXT (train_flag = True): 增加每训练一段时间显示一次验证准确率,即train_flag = True时需要load train和val.
https://stackoverflow.com/questions/44270198/when-using-tfrecord-how-can-i-run-intermediate-validation-check-a-better-way
https://github.com/tensorflow/tensorflow/issues/7902
训练结束显示整个训练集上的准确率?
- NEXT: finetune基于imagenet的inception-resnet v2, senet等
- NEXT: 调参和数据增强,模型复杂度, use log file, use input args 模块化等
REFERENCES
官方baseline
https://github.com/AIChallenger/AI_Challenger/tree/master/Baselines/scene_baseline_simple
tensorflow 变量简单存储与恢复
http://blog.csdn.net/zywvvd/article/details/77941680
"""
import tensorflow as tf
import time
import json
import network
def read_and_decode(tfrecords_file, batch_size, num_epochs):
filename_queue = tf.train.string_input_producer([tfrecord_file], num_epochs = num_epochs)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
img_features = tf.parse_single_example(
serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'h': tf.FixedLenFeature([], tf.int64),
'w': tf.FixedLenFeature([], tf.int64),
'c': tf.FixedLenFeature([], tf.int64),
'image': tf.FixedLenFeature([], tf.string),
})
h = tf.cast(img_features['h'], tf.int32)
w = tf.cast(img_features['w'], tf.int32)
c = tf.cast(img_features['c'], tf.int32)
image = tf.decode_raw(img_features['image'], tf.uint8)
image = tf.reshape(image, [h, w, c])
label = tf.cast(img_features['label'],tf.int32)
#label = tf.reshape(label, [1])
##########################################################
'''data augmentation here'''
# distorted_image = tf.random_crop(images, [530, 530, img_channel])
# distorted_image = tf.image.random_flip_left_right(distorted_image)
# distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)
# distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8)
image = tf.image.resize_images(image, (image_size,image_size))
image = tf.image.per_image_standardization(image) # '''Linearly scales image to have zero mean and unit norm.'''
image = tf.reshape(image, [image_size, image_size, image_channel])
#image, label = tf.train.batch([image, label], batch_size= batch_size)
##########################################################
'''shuffle here'''
image_batch, label_batch = tf.train.shuffle_batch([image, label],
batch_size= batch_size,
num_threads= 64, # 注意多线程有可能改变图片顺序
capacity = 2048,
min_after_dequeue= 64 #256
)
#print(type(label_batch))
return image_batch, label_batch # tf.reshape(label_batch, [batch_size])
def read_and_decode_test(tfrecords_file, batch_size, num_epochs):
filename_queue = tf.train.string_input_producer([tfrecord_file], num_epochs = num_epochs)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
img_features = tf.parse_single_example(
serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'h': tf.FixedLenFeature([], tf.int64),
'w': tf.FixedLenFeature([], tf.int64),
'c': tf.FixedLenFeature([], tf.int64),
'image': tf.FixedLenFeature([], tf.string), #https://stackoverflow.com/questions/41921746/tensorflow-varlenfeature-vs-fixedlenfeature
'image_id': tf.FixedLenFeature([], tf.string)
})
h = tf.cast(img_features['h'], tf.int32)
w = tf.cast(img_features['w'], tf.int32)
c = tf.cast(img_features['c'], tf.int32)
image_id = img_features['image_id']
image = tf.decode_raw(img_features['image'], tf.uint8)
image = tf.reshape(image, [h, w, c])
label = tf.cast(img_features['label'],tf.int32)
#label = tf.reshape(label, [1])
##########################################################
'''no data augmentation'''
image = tf.image.resize_images(image, (image_size,image_size))
image = tf.image.per_image_standardization(image)
image = tf.reshape(image, [image_size, image_size, image_channel])
#image, label = tf.train.batch([image, label], batch_size= batch_size)
image_batch, label_batch, image_id_batch= tf.train.batch([image, label, image_id],
batch_size= batch_size,
num_threads= 64, # 注意多线程有可能改变图片顺序
capacity = 2048)
#print(type(label_batch))
return image_batch, label_batch, image_id_batch
def batch_to_list_of_dicts(indices2, image_id_batch2):
result = [] #[{"image_id":"a0563eadd9ef79fcc137e1c60be29f2f3c9a65ea.jpg","label_id": [5,18,32]}]
dict_ = {}
for item in range(batch_size):
dict_ ['image_id'] = image_id_batch2[item].decode()
dict_['label_id'] = indices2[item,:].tolist()
result.append(dict_)
dict_ = {}
return result
def read_tfrecord2(tfrecord_file, batch_size, train_flag):
# weights = tf.Variable(
# tf.truncated_normal([image_size * image_size * image_channel, num_labels]))
# biases = tf.Variable(tf.zeros([num_labels]))
#因为test有image_id,否则和train共用输入函数就行了。另外read_and_decode训练中会加入data augmentation,因此验证集和测试集均用第二个函数
if train_flag:
train_batch, train_label_batch = read_and_decode(tfrecord_file, batch_size, num_epochs)
# val_test_batch, val_test_label_batch, image_id_batch= read_and_decode_test(tfrecord_file_val, batch_size, 1) #每次用val的时候整个数据过一遍,下次又用怎么办?
# Variables.
# # Training computation.
# logits = tf.matmul(train_batch, weights) + biases
# # https://gxnotes.com/article/29754.html : 张量流tf.nn.softmax和tf.nn.softmax_cross_entropy_with_logits之间的差异
# loss = tf.reduce_mean(
# tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_label_batch, logits=logits))
# # Optimizer.
# optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
optimizer, loss, logits, keep_prob = network.inference(train_batch, train_label_batch)
# Predictions for the training
train_prediction = tf.nn.softmax(logits)
'''minibatch accuracy, non-streaming'''
accuracy = tf.reduce_mean(tf.cast(tf.nn.in_top_k(predictions = logits, targets=train_label_batch, k=3),tf.float32))
else:
val_test_batch, val_test_label_batch, image_id_batch= read_and_decode_test(tfrecord_file, batch_size, num_epochs)
# val_test_logits = tf.matmul(val_test_batch, weights) + biases
# val_test_prediction = tf.nn.softmax(val_test_logits)
val_test_optimizer, val_test_loss, val_test_logits, val_test_keep_prob = network.inference(val_test_batch, val_test_label_batch)
'''Useless minibatch accuracy, non-streaming'''
#http://blog.csdn.net/ib_h20/article/details/72782581: correct = tf.nn.in_top_k(logits, labels, k)
#http://blog.csdn.net/uestc_c2_403/article/details/73187915: tf.nn.in_top_k的用法
val_test_accuracy_batch = tf.reduce_mean(tf.cast(tf.nn.in_top_k(predictions = val_test_logits, targets=val_test_label_batch, k=3),tf.float32))
'''不是minibatch accuracy'''
val_test_accuracy, val_test_accuracy_update= tf.metrics.mean(tf.cast(tf.nn.in_top_k(predictions = val_test_logits, targets=val_test_label_batch, k=3),tf.float32))
# https://github.com/tensorflow/tensorflow/issues/9498
# Implementing non streaming accuracy is simple, ex:
# tf.reduce_mean(tf.to_float32(predictions == labels))
values, indices = tf.nn.top_k(val_test_logits, 3)
saver = tf.train.Saver() # 生成saver
# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
# config = tf.ConfigProto()
# config.gpu_options.allow_growth=True
with tf.Session() as sess:
# https://github.com/tensorflow/tensorflow/issues/1045
sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))
print("Initialized")
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
if train_flag:
try:
step = 0
start_time = time.time()
while not coord.should_stop():
_, l, predictions, logits2, acc= sess.run([optimizer, loss, train_prediction, logits, accuracy], feed_dict={keep_prob: 0.5})
step += 1
if (step % 100 == 0):
l= sess.run(loss, feed_dict={keep_prob: 1})
acc= sess.run(accuracy, feed_dict={keep_prob: 1})
duration = time.time() - start_time
print("Minibatch loss at step %d: %.6f (%.3f sec)" % (step, l, duration))
print("Minibatch accuracy: %.6f" % acc)
#if (step % 100 == 0):
#Validating accuracy
if (step % 2000 ==0):
saver.save(sess, checkfile, global_step=step)
print('writing checkpoint at step %s' % step)
except tf.errors.OutOfRangeError:
print('Done training for %d epochs, %d steps.' % (num_epochs, step))
#Final Training accuracy
#Final Validating accuracy
saver.save(sess, "save_path/final_model.ckpt")
finally:
coord.request_stop()
else:
# # read a batch of test set to verify the input function
# val_test_batch22, val_test_label_batch22, image_id_batch22 = sess.run([val_test_batch, val_test_label_batch, image_id_batch])
# print(val_test_batch22.shape) #(8, 43200)
# print(val_test_label_batch22.shape) #(8,)
# print(image_id_batch22)
# print(type(image_id_batch22[0])) # bytes
# print(type(image_id_batch22[0].decode())) # str
# coord.request_stop()
saver.restore(sess, "save_path/final_model.ckpt") #会将已经保存的变量值resotre到 变量中。
results = []
try:
step = 0
start_time = time.time()
while not coord.should_stop():
val_test_logits2, val_test_acc2_batch, val_test_acc2, val_test_acc2_update,image_id_batch2, indices2, values2= sess.run([val_test_logits, val_test_accuracy_batch, val_test_accuracy, val_test_accuracy_update, image_id_batch, indices, values], feed_dict={keep_prob: 1})
step += 1
results += batch_to_list_of_dicts(indices2, image_id_batch2)
if (step % 10 == 0):
print('Useless minibatch testing accuracy at step %d: %.6f' % (step, val_test_acc2_batch))
#print(val_test_logits2[0])
#print(indices2[0])
#print(values2[0])
#print(val_test_predictions2[0])
#print(val_test_acc2)
#print('Useless streaming testing accuracy at step %d: %.6f' % (step, val_test_acc2))
except tf.errors.OutOfRangeError:
print('Done testing in, %d steps.' % (step))
print('FInal Testing accuracy: %.6f' % (val_test_acc2_update))
'''Writing JSON data'''
#results = [{"image_id":"a0563eadd9ef79fcc137e1c60be29f2f3c9a65ea.jpg","label_id": [5,18,32]}]
print(len(results))
print(results[0:20])
with open('submit.json', 'w') as f:
json.dump(results, f)
finally:
coord.request_stop()
coord.join(threads)
train_flag = True
image_size = 128
num_labels = 80
image_channel = 3
checkfile = 'save_path/model.ckpt'
#max_step = 65000
if train_flag:
tfrecord_file = '../ai_challenger_scene_train_20170904/train.tfrecord'
# tfrecord_file_val = '../ai_challenger_scene_train_20170904/val.tfrecord' # validate while training
batch_size = 32# 我电脑可以128
num_epochs = 38
print('max step num is %.1f' % (num_epochs*53879.0/batch_size))
read_tfrecord2(tfrecord_file, batch_size, train_flag)
else:
tfrecord_file = '../ai_challenger_scene_train_20170904/val.tfrecord' #test
batch_size = 16 # 要求metric能累加起来, 除不尽的话最后不足的,不够一个batch的部分不会被使用!!!
num_epochs = 1
read_tfrecord2(tfrecord_file, batch_size, train_flag)
# with open('submit.json', 'r') as file1:
# submit_data = json.load(file1)
# with open('scene_validation_annotations_20170908.json', 'r') as file2:
# ref_data1 = json.load(file2)
# with open('ref.json', 'r') as file2:
# ref_data2 = json.load(file2)
# with open('submit0.json', 'r') as file3:
# submit0_data = json.load(file3)
# 53879 7120
network.py
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# Copyright 2017 challenger.ai
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
Description:
Network structure of a simple CNN network like Alexnet
CHANGES:
Comments left by Yan Wang according to https://www.tensorflow.org/api_docs/ (python API r1.3)
one_hot_labels 改为 original_labels
思考
batch normalization 加哪
'''
import tensorflow as tf
LEARNINGRATE = 1e-3
#more than 2 standard deviations from the mean are dropped and re-picked
def weight_variable(shape, stddev=0.1):
initial = tf.truncated_normal(shape, stddev=stddev)
return tf.Variable(initial)
def bias_variable(shape, bais=0.1):
initial = tf.constant(bais, shape=shape)
return tf.Variable(initial)
#input tensor of shape [batch, in_height, in_width, in_channels]
#filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels]
#strides: A list of ints. 1-D tensor of length 4. With the default NHWC format, must have strides[0] = strides[3] = 1.
#padding: A string from: "SAME", "VALID". The type of padding algorithm to use.
def conv2d(x, w):
return tf.nn.conv2d(x, w, [1, 1, 1, 1], 'SAME')
#ksize: A list of ints that has length >= 4. The size of the window for each dimension of the input tensor.
#strides: A list of ints that has length >= 4. The stride of the sliding window for each dimension of the input tensor.
def max_pool_2x2(x):
return tf.nn.max_pool(x, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
def max_pool_3x3(x):
return tf.nn.max_pool(x, [1, 3, 3, 1], [1, 2, 2, 1], 'SAME')
def avg_pool_3x3(x):
return tf.nn.avg_pool(x, [1, 3, 3, 1], [1, 2, 2, 1], 'SAME')
def inference(features, original_labels):
# network structure
# conv1
W_conv1 = weight_variable([5, 5, 3, 64], stddev=1e-4) #[filter_height, filter_width, in_channels, out_channels]
b_conv1 = bias_variable([64])
h_conv1 = tf.nn.relu(conv2d(features, W_conv1) + b_conv1)
h_pool1 = max_pool_3x3(h_conv1)
# norm1
norm1 = tf.nn.lrn(h_pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') #'''tf.nn.local_response_normalization'''
# conv2
W_conv2 = weight_variable([5, 5, 64, 64], stddev=1e-2)
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(norm1, W_conv2) + b_conv2)
# norm2
norm2 = tf.nn.lrn(h_conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
h_pool2 = max_pool_3x3(norm2)
# conv3
W_conv3 = weight_variable([5, 5, 64, 64], stddev=1e-2)
b_conv3 = bias_variable([64])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
h_pool3 = max_pool_3x3(h_conv3)
# fc1
W_fc1 = weight_variable([16 * 16 * 64, 128]) #'''16*16的计算'''
b_fc1 = bias_variable([128])
h_pool3_flat = tf.reshape(h_pool3, [-1, 16*16*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
# introduce dropout
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) #'''tf.nn.dropout'''
# fc2
W_fc2 = weight_variable([128, 80]) #'''还是128'''
b_fc2 = bias_variable([80])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
# calculate loss
cross_entropy = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(labels=original_labels, logits=y_conv))
train_step = tf.train.AdamOptimizer(LEARNINGRATE).minimize(cross_entropy)
return train_step, cross_entropy, y_conv, keep_prob