原文地址:AlexNet
关于文章的理解,网上有很多博客可以参考,这里只给出LRN(local response normalization)的一篇回答,其中形象的解释了LRN,如下图所示。地址链接
import tensorflow as tf
from tensorflow.contrib import slim
import numpy as np
def AlexNet(x_input):
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01),
weights_regularizer=slim.l2_regularizer(0.0005)):
# layer 1
net = slim.conv2d(x_input, 96, [11,11], padding="VALID", stride=4, scope="conv_1")
net = tf.nn.local_response_normalization(net, depth_radius=5, bias=2, alpha=0.0001, beta=0.75)
net = slim.max_pool2d(net, [3,3], 2, scope="pool_1")
# layer 2
net = slim.conv2d(net, 256, [5,5], scope="conv_2")
net = tf.nn.local_response_normalization(net, depth_radius=5, bias=2, alpha=0.0001, beta=0.75)
net = slim.max_pool2d(net, [3,3], 2, scope="pool_2")
# layer 3 4
net = slim.repeat(net, 2, slim.conv2d, 384, [3,3], scope="conv_3_4")
# layer 5
net = slim.conv2d(net, 256, [3,3], scope="conv_5")
net = slim.max_pool2d(net, [3,3], 2, scope="pool_5")
# flatten
net = slim.flatten(net, scope="flatten")
# layer 6
net = slim.fully_connected(net, 4096, scope="fc_6")
net = slim.dropout(net, keep_prob=0.5)
# layer 7
net = slim.fully_connected(net, 4096, scope="fc_7")
net = slim.dropout(net, keep_prob=0.5)
#output
net = slim.fully_connected(net, 2, scope="output", activation_fn=None)
return net
#构建输入、输出、loss、train_op等
tf.reset_default_graph()
x_inputs = tf.placeholder(shape=[None, 227, 227, 3], dtype=tf.float32)
y_labels = tf.placeholder(shape=[None, 2], dtype=tf.float32)
# model
predicts = AlexNet(x_inputs)
# loss function
loss = tf.losses.softmax_cross_entropy(y_labels, predicts)
# train op
optimizer = tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9).minimize(loss)
# accuracy
correct_prediction = tf.equal(tf.argmax(predicts,1), tf.argmax(y_labels,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
原文中最后一层是1000个节点,我这里改成2是因为我之后需要使用该网络来进行猫狗图片分类。
datas = LoadDatas("../datas/PetImages/")
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# 将训练的结果输出到文件中
loss_fp = open("loss.txt", "w")
accuracy_fp = open("accuracy.txt", "w")
error_log_fp = open("error_log", "w")
try:
for i in range(2000):
images, labels = datas.reads(batch_size=128)
accuracy_out, _, loss_out = sess.run([accuracy, optimizer, loss], feed_dict={x_inputs:images, y_labels:labels})
loss_fp.write(str(i) + " : " + str(loss_out) + "\n")
if i%100 == 0:
accuracy_fp.write(str(i) + " : " + str(accuracy_out) + "\n")
except Exception as e:
error_log_fp.write(str(e))
error_log_fp.close()
finally:
sess.close()
loss_fp.close()
accuracy_fp.close()
数据集下载地址:kagglecatsanddogs_3367a.zip
自定义一个从文件中加载数据的类
from skimage import io,transform
import numpy as np
import glob
class LoadDatas(object):
'''
基本想法是采用列表来保存所有的图片的路径和所属的类别,在读取阶段再从列表中找到文件路径,最后再读取图片,返回'''
def __init__(self, filepath):
'''
self.datas is a list of which each item is [image_file_path, class_label]'''
dogs = glob.glob(filepath + "/Dog/*.jpg")
cats = glob.glob(filepath + "/Cat/*.jpg")
classes = {
0:cats, 1:dogs}
self.datas = []
for item in classes:
for filepath in classes[item]:
self.datas.append([filepath, item])
def next_batch(self, batch_size=16):
'''
-------------
Return
@images is a np.ndarray with shape=(batch_size, 227, 227, 3)
@labels is a np.ndarray with shape=(batch_size, 2)'''
dataset = np.asarray(a=self.datas)
np.random.shuffle(dataset)
images = [] # output
labels = [] # output
for item in dataset[:batch_size]:
img_raw = io.imread(item[0])
img_raw = transform.resize(img_raw, (227,227)) # shape=(227,227,3)
label = self.label_to_list(int(item[1])) # shape=(2)
images.append(img_raw)
labels.append(label)
return np.asarray(images), np.asarray(labels)
def label_to_list(self, label):
label_list = [0 for i in range(2)]
label_list[label] = 1
return label_list
def reads(self, batch_size=16, max_error_count=10):
count = 0
while True:
try:
return self.next_batch(batch_size)
except OSError as e:
count += 1
if count >= max_error_count: return None