Tensorflow 实现AlexNet 猫狗分类

原文地址:AlexNet

关于文章的理解,网上有很多博客可以参考,这里只给出LRN(local response normalization)的一篇回答,其中形象的解释了LRN,如下图所示。地址链接

实现步骤:

1.构建网络

import tensorflow as tf
from tensorflow.contrib import slim
import numpy as np
def AlexNet(x_input):
    with slim.arg_scope([slim.conv2d, slim.fully_connected], 
                        activation_fn=tf.nn.relu, 
                        weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), 
                        weights_regularizer=slim.l2_regularizer(0.0005)):
        # layer 1
        net = slim.conv2d(x_input, 96, [11,11], padding="VALID", stride=4, scope="conv_1")
        net = tf.nn.local_response_normalization(net, depth_radius=5, bias=2, alpha=0.0001, beta=0.75)
        net = slim.max_pool2d(net, [3,3], 2, scope="pool_1")
        # layer 2
        net = slim.conv2d(net, 256, [5,5], scope="conv_2")
        net = tf.nn.local_response_normalization(net, depth_radius=5, bias=2, alpha=0.0001, beta=0.75)
        net = slim.max_pool2d(net, [3,3], 2, scope="pool_2")
        # layer 3 4
        net = slim.repeat(net, 2, slim.conv2d, 384, [3,3], scope="conv_3_4")
        # layer 5
        net = slim.conv2d(net, 256, [3,3], scope="conv_5")
        net = slim.max_pool2d(net, [3,3], 2, scope="pool_5")
        # flatten
        net = slim.flatten(net, scope="flatten")
        # layer 6
        net = slim.fully_connected(net, 4096, scope="fc_6")
        net = slim.dropout(net, keep_prob=0.5)
        # layer 7
        net = slim.fully_connected(net, 4096, scope="fc_7")
        net = slim.dropout(net, keep_prob=0.5)
        #output
        net = slim.fully_connected(net, 2, scope="output", activation_fn=None)

        return net

#构建输入、输出、loss、train_op等
tf.reset_default_graph()
x_inputs = tf.placeholder(shape=[None, 227, 227, 3], dtype=tf.float32)
y_labels = tf.placeholder(shape=[None, 2], dtype=tf.float32)
# model
predicts = AlexNet(x_inputs)
# loss function
loss = tf.losses.softmax_cross_entropy(y_labels, predicts)
# train op
optimizer = tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9).minimize(loss)
# accuracy
correct_prediction = tf.equal(tf.argmax(predicts,1), tf.argmax(y_labels,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

原文中最后一层是1000个节点,我这里改成2是因为我之后需要使用该网络来进行猫狗图片分类。

2 训练模型

datas = LoadDatas("../datas/PetImages/")

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# 将训练的结果输出到文件中
loss_fp = open("loss.txt", "w")
accuracy_fp = open("accuracy.txt", "w")
error_log_fp = open("error_log", "w")
try:
    for i in range(2000):
        images, labels = datas.reads(batch_size=128)
        accuracy_out, _, loss_out = sess.run([accuracy, optimizer, loss], feed_dict={x_inputs:images, y_labels:labels})
        loss_fp.write(str(i) + " : " + str(loss_out) + "\n")
        if i%100 == 0:
            accuracy_fp.write(str(i) + " : " + str(accuracy_out) + "\n")
except Exception as e:
    error_log_fp.write(str(e))
    error_log_fp.close()
finally:    
    sess.close()
    loss_fp.close()
    accuracy_fp.close()

数据集下载地址:kagglecatsanddogs_3367a.zip

3 LoadDatas 加载数据

自定义一个从文件中加载数据的类

from skimage import io,transform
import numpy as np
import glob

class LoadDatas(object):
    '''
    基本想法是采用列表来保存所有的图片的路径和所属的类别,在读取阶段再从列表中找到文件路径,最后再读取图片,返回'''
    def __init__(self, filepath):
        '''
        self.datas is a list of which each item is [image_file_path, class_label]'''
        dogs = glob.glob(filepath + "/Dog/*.jpg")
        cats = glob.glob(filepath + "/Cat/*.jpg")
        classes = {
    0:cats, 1:dogs}
        self.datas = []
        for item in classes:
            for filepath in classes[item]:
                self.datas.append([filepath, item])


    def next_batch(self, batch_size=16):
        '''
        -------------
        Return
        @images is a np.ndarray with shape=(batch_size, 227, 227, 3)
        @labels is a np.ndarray with shape=(batch_size, 2)'''
        dataset = np.asarray(a=self.datas)
        np.random.shuffle(dataset)
        images = []    # output 
        labels = []    # output
        for item in dataset[:batch_size]:
            img_raw = io.imread(item[0])
            img_raw = transform.resize(img_raw, (227,227)) # shape=(227,227,3)
            label = self.label_to_list(int(item[1]))   # shape=(2)
            images.append(img_raw)
            labels.append(label)

        return np.asarray(images), np.asarray(labels)


    def label_to_list(self, label):
        label_list = [0 for i in range(2)]
        label_list[label] = 1

        return label_list


    def reads(self, batch_size=16, max_error_count=10):
        count = 0
        while True:
            try:
                return self.next_batch(batch_size)
            except OSError as e:
                count += 1
                if count >= max_error_count: return None

最后

  1. 在jupyter notebook中运行以上代码,方便调试
  2. AlexNet网络参数有点多,请使用GPU进行训练
  3. 首先理解AlexNet,最好是阅读全文
  4. 如有不足,望不吝赐教

你可能感兴趣的:(ML,Tensorflow,AlexNet)