这一篇文章是为了自己记录用的,怕自己忘记。不建议入门,想要看完整版的请移步 此大牛的github。https://github.com/CraneHzm/OxFlowers_BCNN大牛使用的是tensorflow的自定义Estimator。
首先是生成 tfrecord 文件
import tensorflow as tf
from PIL import Image
import os
def creat_train_original_tfrecord(train_image_path, save_path='model/OriginalModel', mode='train'):
cwd = os.getcwd()
classes = os.listdir(os.path.join(cwd, train_image_path))
if mode == 'train':
writer = tf.python_io.TFRecordWriter(os.path.join(cwd, save_path, 'trainOriginal.tfrecords'))
elif mode == 'test':
writer = tf.python_io.TFRecordWriter(os.path.join(cwd, save_path, 'testOriginal.tfrecords'))
for index, name in enumerate(classes):
classes_path = os.path.join(cwd, train_image_path, name)
for image_name in os.listdir(classes_path):
image_path = os.path.join(classes_path, image_name)
print(image_name)
img = Image.open(image_path)
img = img.resize((224, 224))
img_raw = img.tobytes()
example = tf.train.Example(features=tf.train.Features(feature={
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[int(name)])),
'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
}))
writer.write(example.SerializeToString())
writer.close()
if __name__ == "__main__":
creat_train_original_tfrecord(train_image_path='TrainImage')
接下来是使用双线性网络来进行训练
在这个网络中,我使用了两次双线性变换,目的是为了检测准确率是否提高。相比较于单层卷积神经网络,在我的数据数据集上面提高了 近 6 个百分点。网络我没有进行固化,这只是个验证模型。
import tensorflow as tf
import os
ImageHeight = 224
ImageWidth = 224
ImageChannels = 3
CategoryNum = 3
Batch_Size = 20
Steps = 100
# Bottleneck_tensor_name = 'pool_3/_reshape:0'
# Jpeg_data_tensor_name = 'DecodeJpeg/contents:0'
def parse_function(examplt_proto):
features = tf.parse_single_example(examplt_proto, features={
'label': tf.FixedLenFeature([], tf.int64),
'img_raw': tf.FixedLenFeature([], tf.string),
})
imgs = tf.decode_raw(features['img_raw'], tf.uint8)
imgs = tf.reshape(imgs, [ImageWidth, ImageHeight, ImageChannels])
imgs = tf.cast(imgs, tf.float32) * (1. / 255) - 0.5
labeles = tf.cast(features['label'], tf.int64)
return imgs, labeles
def read_tfrecord(tfrecords_path):
dataset = tf.data.TFRecordDataset(tfrecords_path)
dataset = dataset.map(parse_function)
dataset = dataset.shuffle(1000).repeat().batch(Batch_Size)
train_iterator = dataset.make_one_shot_iterator()
features, labels = train_iterator.get_next()
return features, labels
def main():
features, labels = read_tfrecord(os.path.join(os.getcwd(), 'model/OriginalModel/trainOriginal.tfrecords'))
print(labels.shape)
print(features.shape)
input_layer = tf.reshape(features, shape=[-1, ImageHeight, ImageWidth, ImageChannels], name='input_layer')
# Convolutional Layer #1
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=4,
kernel_size=3,
padding="same")
# Batch Normalization Layer #3
# 使用 Batch_Normal①不仅仅极大提升了训练速度,收敛过程大大加快;
# ②还能增加分类效果,一种解释是这是类似于Dropout的一种防止过拟合的正则化表达方式,所以不用Dropout也能达到相当的效果;
# ③另外调参过程也简单多了,对于初始化要求没那么高,而且可以使用大的学习率等
bn1 = tf.layers.batch_normalization(inputs=conv1)
layer1 = tf.nn.relu(bn1)
# Pooling Layer #1
pool1 = tf.layers.max_pooling2d(inputs=layer1, pool_size=[2, 2], strides=2)
# Convolutional Layer #2
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=8,
kernel_size=3,
padding="same")
# Batch Normalization Layer #3
bn2 = tf.layers.batch_normalization(inputs=conv2)
layer2 = tf.nn.relu(bn2)
# Pooling Layer #2
pool2 = tf.layers.max_pooling2d(inputs=layer2, pool_size=[2, 2], strides=2)
# Convolutional Layer #3
conv3 = tf.layers.conv2d(
inputs=pool2,
filters=16,
kernel_size=3,
padding="same")
# Batch Normalization Layer #3
bn3 = tf.layers.batch_normalization(inputs=conv3)
layer3 = tf.nn.relu(bn3)
# Pooling Layer #3
pool3 = tf.layers.max_pooling2d(inputs=layer3, pool_size=[2, 2], strides=2)
# Convolutional Layer #4
conv4 = tf.layers.conv2d(
inputs=pool3,
filters=32,
kernel_size=3,
padding="same")
# Batch Normalization Layer #4
bn4 = tf.layers.batch_normalization(inputs=conv4)
layer4 = tf.nn.relu(bn4)
# Pooling Layer #4
pool4 = tf.layers.max_pooling2d(inputs=layer4, pool_size=[2, 2], strides=2)
# Convolutional Layer #5
conv5 = tf.layers.conv2d(
inputs=pool4,
filters=64,
kernel_size=3,
padding="same")
# Batch Normalization Layer #5
bn5 = tf.layers.batch_normalization(inputs=conv5)
layer5 = tf.nn.relu(bn5)
pool5 = tf.layers.max_pooling2d(inputs=layer5, pool_size=[2, 2], strides=2)
# Convolutional Layer #6
conv6 = tf.layers.conv2d(
inputs=pool5,
filters=128,
kernel_size=3,
padding="same", )
# Batch Normalization Layer #6
bn6 = tf.layers.batch_normalization(inputs=conv6)
layer6 = tf.nn.relu(bn6)
pool6 = tf.layers.max_pooling2d(inputs=layer6, pool_size=[2, 2], strides=2)
print(layer6)
print(pool6)
phi_I = tf.einsum('ijkm,ijkn->imn', pool6, pool6)
phi_I = tf.einsum('ijm,ijn->imn',phi_I, phi_I)
print("First:{}".format(phi_I))
phi_I = tf.reshape(phi_I, [-1, 128 * 128])
print("Second:{}".format(phi_I))
phi_I = tf.divide(phi_I, 49)
print("Third:{}".format(phi_I))
phi_I = tf.layers.batch_normalization(inputs=phi_I)
print(phi_I)
# input("input: ")
pool6_flat = tf.reshape(pool6, [-1, pool6.shape[1] * pool6.shape[2] * pool6.shape[3]])
dropout = tf.layers.dropout(inputs=phi_I, rate=0.8, training=None)
logits = tf.layers.dense(inputs=dropout, units=CategoryNum, name='logits')
predictions = tf.argmax(input=logits, axis=1, name='classes')
probabilities = tf.nn.softmax(logits=logits, name='probabilities')
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
correct_predidtion = tf.equal(tf.argmax(probabilities,axis=1), labels)
accuracy = tf.reduce_mean(tf.cast(correct_predidtion, tf.float32), name='accuracy')
optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(loss=loss)
with tf.Session() as sess:
accuracy_mean = 0
sess.run(tf.global_variables_initializer())
for i in range(Steps):
sess.run(optimizer)
the_accuracy = sess.run(accuracy)
accuracy_mean += the_accuracy
print('steeps:{0} accuracy:{1}'.format(i, the_accuracy))
print("finally accuracy:{0}".format(accuracy_mean/Steps))
if __name__ == "__main__":
main()