《面向机器智能的TensorFlow实践》第五章 Stanford Dogs

由于设备和时间的原因,将Stanford Dogs数据集减小至三个品种。

主要使用实现了TFRecord存储和CNN网络,《面向机器智能的TensorFlow实践》的部分代码。

还存在着loss震荡问题,求大佬修改指导,萌新欢迎大家学习讨论。

需要先将图片信息保存成TFRecord文件,再执行图片识别(引号中的Session先执行)。需要先建立对应文件存储的文件夹。

​from itertools import groupby
from tensorflow.python.ops import random_ops
from collections import defaultdict
import tensorflow as tf
import glob
from PIL import Image
import numpy as np

BATCH_SIZE = 1
IMAGE_WIDTH = 250
IMAGE_HEIGHT = 151
IMAGE_CHANNEL = 1
BREEDS = 3  # 狗的种类
# 对不同文件夹中的图片进行处理
image_filenames = glob.glob('imagenet-dogs/Images/n02*/*.jpg')
# print(image_filenames[0:2])
train_dataset = defaultdict(list)  # defaultdict可使用未定义的Key
test_dataset = defaultdict(list)

image_filename_with_breed = map(lambda filename: (filename.split("\\")[1], filename), image_filenames)
# python map(fun,[arg]+) return iterators,turn to list method:list(iterators)
# print(list(image_filename_with_breed)[0:2])
for dog_breed, breed_images in groupby(image_filename_with_breed, lambda x: x[0]):
    # dog_breed表示狗的类型
    for i, breed_image in enumerate(breed_images):
        # enumerate同时列出迭代对象的下标和值
        # print(i,breed_image)
        # 将20%的数据划入测试集
        if i % 5 == 0:
            test_dataset[dog_breed].append(breed_image[1])
        else:
            train_dataset[dog_breed].append(breed_image[1])
    breed_train_count = len(train_dataset[dog_breed])
    breed_test_count = len(test_dataset[dog_breed])
    assert round(breed_test_count / (breed_train_count + breed_test_count), 2) > 0.18, 'Not enough testing images'


def write_records_file(sess, dataset, record_location):
    current_index = 0
    writer = 0
    for breed, image_filenames in dataset.items():
        for image_filename in image_filenames:
            # 将每100个图片划入一TFRecord文件
            if current_index % 100 == 0:
                if writer:
                    writer.close()
                record_filename = "{record_location}-{current_index}.tfrecords".format(record_location=record_location,
                                                                                       current_index=current_index)
                print('record_filename:', record_filename)
                writer = tf.python_io.TFRecordWriter(record_filename)
            current_index += 1
            image_file = tf.read_file(image_filename)
            try:
                image = tf.image.decode_jpeg(image_file)
            except:
                print(image_filename)
                continue
            grayscale_image = tf.image.rgb_to_grayscale(image)  # 转化为灰度图
            resized_image = tf.image.resize_images(grayscale_image, [IMAGE_WIDTH, IMAGE_HEIGHT])  # 更改图片尺寸
            image_bytes = sess.run(tf.cast(resized_image, tf.uint8)).tobytes()
            image_label = breed.encode('utf-8')
            example = tf.train.Example(
                features=tf.train.Features(feature={
                    'label': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_label])),
                    'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes]))
                }))  # 以比特流的形式进行存储,固定格式
            writer.write(example.SerializeToString())
    writer.close()


'''
#存储TFRecord session
with tf.Session() as sess:
	coord=tf.train.Coordinator()
	threads= tf.train.start_queue_runners(coord=coord)
	write_records_file(sess,test_dataset,'./imagenet-dogs/testing-images/testing-image')
	write_records_file(sess,train_dataset,'./imagenet-dogs/training-images/training-image')
	coord.request_stop()
	coord.join(threads)
'''


def read_records_file(record_path):
    # filename_queue = tf.train.string_input_producer(['./imagenet-dogs/training-images/training-image-100.tfrecords','./imagenet-dogs/training-images/training-image-100.tfrecords'])
    filename_queue = tf.train.string_input_producer(tf.train.match_filenames_once(record_path),
                                                    shuffle=True)  # shuffle参数True表示不按顺序执行,False表示按顺序执行
    reader = tf.TFRecordReader()
    _, serialized = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized, features={
            'label': tf.FixedLenFeature([], tf.string),
            'image': tf.FixedLenFeature([], tf.string)
        })
    recordimage = tf.decode_raw(features['image'], tf.uint8)
    recordimage = tf.reshape(recordimage, [IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNEL])
    recordlabel = tf.cast(features['label'], tf.string)
    return recordlabel, recordimage


def Batch_dataset(image, label):
    min_after_dequeue = 10
    capacity = min_after_dequeue + 3 * BATCH_SIZE
    return tf.train.shuffle_batch([image, label], batch_size=BATCH_SIZE, capacity=capacity,
                                  min_after_dequeue=min_after_dequeue)


def Float_image_batch(image_batch):
    return tf.image.convert_image_dtype(image_batch, tf.float32)


def LabelsToNum(label_batch):
    # 将狗品种标签变为整数 label_batch
    # Find every directory name in the imagenet-dogs directory (n02085620-Chihuahua, ...)
    breeds = list(map(lambda c: c.split("\\")[-1], glob.glob("./imagenet-dogs/Images/*")))
    # Match every label from label_batch and return the index where they exist in the list of classes
    # Numlabels = tf.map_fn(lambda l: tf.where(tf.equal(breeds, l))[0, 0:1][0], label_batch, dtype=tf.int64)
    Numlabels = np.zeros([BATCH_SIZE, len(breeds)], int)
    for i in range(BATCH_SIZE):
        for j in range(len(breeds)):
            # print(label_batch[i],breeds[j].encode(encoding='utf-8'))
            if label_batch[i] == breeds[j].encode(encoding='utf-8'):
                Numlabels[i][j] = 1
    return Numlabels


# 需要提前定义graph
train_recordlabel, train_recordimage = read_records_file('./imagenet-dogs/training-images/*.tfrecords')
train_image_batch, train_label_batch = Batch_dataset(train_recordimage, train_recordlabel)
test_recordlabel, test_recordimage = read_records_file('./imagenet-dogs/testing-images/*.tfrecords')
test_image_batch, test_label_batch = Batch_dataset(test_recordimage, test_recordlabel)
# tf.train.shuffle_batch([example, label], batch_size=batch_size, capacity=capacity)  产生乱序的batch
# capacity是队列中的容量
float_trainimage_batch = Float_image_batch(train_image_batch)
float_testimage_batch = Float_image_batch(test_image_batch)

image_holder = tf.placeholder(tf.float32, [BATCH_SIZE, IMAGE_WIDTH, IMAGE_HEIGHT, IMAGE_CHANNEL])
label_holder = tf.placeholder(tf.int64, [BATCH_SIZE, BREEDS])

conv2d_layer_one = tf.contrib.layers.convolution2d(image_holder, num_outputs=32, kernel_size=(5, 5),
                                                   activation_fn=tf.nn.relu,
                                                   stride=(2, 2), trainable=True)
pool_layer_one = tf.nn.max_pool(conv2d_layer_one, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

conv2d_layer_two = tf.contrib.layers.convolution2d(pool_layer_one, num_outputs=64, kernel_size=(5, 5),
                                                   activation_fn=tf.nn.relu, stride=(1, 1), trainable=True)
pool_layer_two = tf.nn.max_pool(conv2d_layer_two, strides=[1, 2, 2, 1], ksize=[1, 2, 2, 1], padding='SAME')

flattened_layer_two = tf.reshape(
    pool_layer_two,
    [
        BATCH_SIZE,  # Each image in the image_batch
        -1  # Every other dimension of the input
    ])  # 用于连接全连接层
hidden_layer_three = tf.contrib.layers.fully_connected(

    flattened_layer_two,
    512,
    # weights_initializer=tf.Variable(tf.truncated_normal([38912, 200], stddev=0.1)),
    activation_fn=tf.nn.relu
)
# hidden_layer_three = tf.nn.dropout(hidden_layer_three, 0.1)
final_fully_connected = tf.contrib.layers.fully_connected(
    hidden_layer_three,
    BREEDS,  # Number of dog breeds in the ImageNet Dogs dataset
)
train_prediction = tf.nn.softmax(final_fully_connected)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label_holder, logits=train_prediction))
optimizer = tf.train.GradientDescentOptimizer(1e-4).minimize(loss)

# correctlist = tf.equal(tf.argmax(label_holder, 1), tf.argmax(train_prediction, 1))
# accuracy = tf.reduce_mean(tf.cast(correctlist, tf.float32))

with tf.Session() as sess:
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    sess.run(init_op)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    print(final_fully_connected)
    # print(sess.run(train_labels))
    # print(sess.run(train_labels))
    for h in range(10):
        print('第' + str(h) + '轮训练')
        for _ in range(3000):
            train_i = sess.run(float_trainimage_batch)
            train_l = sess.run(train_label_batch)
            train_labels = LabelsToNum(train_l)
            # iii = train_i.reshape(IMAGE_WIDTH, IMAGE_HEIGHT)
            # img = Image.fromarray(iii, 'L')  # 测试TFRecord转化生成的图片,灰度图“L”,彩色图“RGB”
            # img.save('./output/' + str(train_labels) + str(_) + '.jpg')  # 存下图片
            sess.run(optimizer, feed_dict={image_holder: train_i, label_holder: train_labels})
        print('loss函数:', sess.run(loss, feed_dict={image_holder: train_i, label_holder: train_labels}))
    for t in range(20):
        test_i = sess.run(float_testimage_batch)
        test_l = sess.run(test_label_batch)
        test_labels = LabelsToNum(test_l)
        result = sess.run(train_prediction, feed_dict={image_holder: test_i})
        print("测试:", test_labels, result)
    coord.request_stop()
    coord.join(threads)
​

 

你可能感兴趣的:(tensorflow)