最近研究了一下如何用tensorflow搭建alexnet并进行测试,使用tensorboard查看训练过程,我没有使用tensorflow自带的minist数据集,而是通过加载keras的minist数据集得到numpy array类型的数据,再自己处理之后“喂”给网络的,使用了tqdm显示训练进度条。我把整个过程写了一个完整的代码供大家参考。
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 3 15:10:40 2018
@author: leex
"""
import tensorflow as tf
#from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
#from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
from keras.datasets import mnist
from keras.utils import to_categorical
import os
import cv2
import time
from tqdm import tqdm
import random
os.environ['CUDA_VISIBLE_DEVICES']='0'
log_dir = '/opt/Data/lixiang/alexnet/log'
model_path = '/opt/Data/lixiang/alexnet/model'
model_id = np.int64(time.strftime('%Y%m%d%H%M', time.localtime(time.time())))
#n_input = 784
n_output = 10
lr = 0.00001
dropout_rate = 0.75
epochs = 20
test_step = 10
batch_size = 32
image_size = 224
def load_mnist(image_size):
(x_train,y_train),(x_test,y_test) = mnist.load_data()
train_image = [cv2.cvtColor(cv2.resize(img,(image_size,image_size)),cv2.COLOR_GRAY2BGR) for img in x_train]
test_image = [cv2.cvtColor(cv2.resize(img,(image_size,image_size)),cv2.COLOR_GRAY2BGR) for img in x_test]
train_image = np.asarray(train_image)
test_image = np.asarray(test_image)
train_label = to_categorical(y_train)
test_label = to_categorical(y_test)
print('finish loading data!')
return train_image, train_label, test_image, test_label
def get_batch(image, label, batch_size, now_batch, total_batch):
if now_batch < total_batch-1:
image_batch = image[now_batch*batch_size:(now_batch+1)*batch_size]
label_batch = label[now_batch*batch_size:(now_batch+1)*batch_size]
else:
image_batch = image[now_batch*batch_size:]
label_batch = label[now_batch*batch_size:]
# image_batch = tf.convert_to_tensor(image_batch)
# label_batch = tf.convert_to_tensor(label_batch)
return image_batch, label_batch
def shuffle_set(train_image, train_label, test_image, test_label):
train_row = range(len(train_label))
random.shuffle(train_row)
train_image = train_image[train_row]
train_label = train_label[train_row]
test_row = range(len(test_label))
random.shuffle(test_row)
test_image = test_image[test_row]
test_label = test_label[test_row]
return train_image, train_label, test_image, test_label
def print_layer(layer):
print(layer.op.name + ':' + str(layer.get_shape().as_list()))
# define layers
def conv(x, kernel, strides, b):
return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, kernel, strides, padding = 'SAME'), b))
def max_pooling(x, kernel, strides):
return tf.nn.max_pool(x, kernel, strides, padding = 'VALID')
def fc(x, w, b):
return tf.nn.relu(tf.add(tf.matmul(x,w),b))
# define variables
weights = {
'wc1':tf.Variable(tf.random_normal([11,11,3,96], dtype=tf.float32, stddev=0.1), name='weights1'),
'wc2':tf.Variable(tf.random_normal([5,5,96,256], dtype=tf.float32, stddev=0.1), name='weights2'),
'wc3':tf.Variable(tf.random_normal([3,3,256,384], dtype=tf.float32, stddev=0.1), name='weights3'),
'wc4':tf.Variable(tf.random_normal([3,3,384,384], dtype=tf.float32, stddev=0.1), name='weights4'),
'wc5':tf.Variable(tf.random_normal([3,3,384,256], dtype=tf.float32, stddev=0.1), name='weights5'),
'wd1':tf.Variable(tf.random_normal([6*6*256, 4096], dtype=tf.float32, stddev=0.1), name='weights_fc1'),
'wd2':tf.Variable(tf.random_normal([4096, 1000], dtype=tf.float32, stddev=0.1), name='weights_fc2'),
'wd3':tf.Variable(tf.random_normal([1000, n_output], dtype=tf.float32, stddev=0.1), name='weights_fc3'),
'd1':tf.Variable(tf.random_normal([28*28*3, 1000], dtype=tf.float32, stddev=0.1), name='weights_fc1'),
'd2':tf.Variable(tf.random_normal([1000, 1000], dtype=tf.float32, stddev=0.1), name='weights_fc2'),
'd3':tf.Variable(tf.random_normal([1000, n_output], dtype=tf.float32, stddev=0.1), name='weights_fc3'),
}
bias = {
'bc1':tf.Variable(tf.random_normal([96]), name='bias1'),
'bc2':tf.Variable(tf.random_normal([256]), name='bias2'),
'bc3':tf.Variable(tf.random_normal([384]), name='bias3'),
'bc4':tf.Variable(tf.random_normal([384]), name='bias4'),
'bc5':tf.Variable(tf.random_normal([256]), name='bias5'),
'bd1':tf.Variable(tf.random_normal([4096]), name='bias_fc1'),
'bd2':tf.Variable(tf.random_normal([1000]), name='bias_fc2'),
'bd3':tf.Variable(tf.random_normal([n_output]), name='bias_fc3'),
'd1':tf.Variable(tf.random_normal([1000]), name='bias_fc1'),
'd2':tf.Variable(tf.random_normal([1000]), name='bias_fc2'),
'd3':tf.Variable(tf.random_normal([n_output]), name='bias_fc3'),
}
strides = {
'sc1':[1,4,4,1],
'sc2':[1,1,1,1],
'sc3':[1,1,1,1],
'sc4':[1,1,1,1],
'sc5':[1,1,1,1],
'sp1':[1,2,2,1],
'sp2':[1,2,2,1],
'sp3':[1,2,2,1]
}
pooling_size = {
'kp1':[1,3,3,1],
'kp2':[1,3,3,1],
'kp3':[1,3,3,1]
}
#build model
def alexnet(inputs, weights, bias, strides, pooling_size, keep_prob):
with tf.name_scope('conv1'):
conv1 = conv(inputs, weights['wc1'], strides['sc1'], bias['bc1'])
print_layer(conv1)
with tf.name_scope('pool1'):
pool1 = max_pooling(conv1, pooling_size['kp1'], strides['sp1'])
print_layer(pool1)
with tf.name_scope('conv2'):
conv2 = conv(pool1, weights['wc2'], strides['sc2'], bias['bc2'])
print_layer(conv2)
with tf.name_scope('pool2'):
pool2 = max_pooling(conv2, pooling_size['kp2'], strides['sp2'])
print_layer(pool2)
with tf.name_scope('conv3'):
conv3 = conv(pool2, weights['wc3'], strides['sc3'], bias['bc3'])
print_layer(conv3)
with tf.name_scope('conv4'):
conv4 = conv(conv3, weights['wc4'], strides['sc4'], bias['bc4'])
print_layer(conv4)
with tf.name_scope('conv5'):
conv5 = conv(conv4, weights['wc5'], strides['sc5'], bias['bc5'])
print_layer(conv5)
with tf.name_scope('pool3'):
pool3 = max_pooling(conv5, pooling_size['kp3'], strides['sp3'])
print_layer(pool3)
flatten = tf.reshape(pool3, [-1,6*6*256])
with tf.name_scope('fc1'):
fc1 = fc(flatten, weights['wd1'], bias['bd1'])
fc1_drop = tf.nn.dropout(fc1, keep_prob)
print_layer(fc1_drop)
with tf.name_scope('fc2'):
fc2 = fc(fc1_drop, weights['wd2'], bias['bd2'])
fc2_drop = tf.nn.dropout(fc2, keep_prob)
print_layer(fc2_drop)
with tf.name_scope('fc3'):
outputs = tf.matmul(fc2_drop, weights['wd3']) + bias['bd3']
print_layer(outputs)
return outputs
def FC_model(inputs, weights, bias):
flatten = tf.reshape(inputs, (-1, 28*28*3))
with tf.name_scope('fc1'):
fc1 = fc(flatten, weights['d1'],bias['d1'])
with tf.name_scope('fc2'):
fc2 = fc(fc1, weights['d2'],bias['d2'])
with tf.name_scope('fc3'):
out = fc(fc2, weights['d3'],bias['d3'])
return out
x = tf.placeholder(tf.float32, [None, image_size, image_size, 3])
y = tf.placeholder(tf.float32, [None, n_output])
keep_prob = tf.placeholder(tf.float32)
pred = alexnet(x, weights, bias, strides, pooling_size, keep_prob)
#pred_logits = tf.nn.softmax(pred)
#loss = -tf.reduce_mean(tf.reduce_sum(y*tf.log(pred_logits+0.001), reduction_indices=1))
#pred = FC_model(x, weights, bias)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y, logits = pred))
tf.summary.scalar('loss', loss)
train_step = tf.train.AdamOptimizer(learning_rate = lr).minimize(loss)
correct = tf.equal(tf.argmax(y,1), tf.argmax(pred,1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
tf.summary.scalar('accuracy', accuracy)
init = tf.global_variables_initializer()
merge = tf.summary.merge_all()
saver = tf.train.Saver()
train_image, train_label, test_image, test_label = load_mnist(image_size)
with tf.Session() as sess:
sess.run(init)
train_writer = tf.summary.FileWriter(log_dir + '/train', sess.graph)
test_writer = tf.summary.FileWriter(log_dir + '/test')
total_batch = int(len(train_label)/batch_size) + 1
test_total_batch = int(len(test_label)/batch_size) + 1
for epoch in range(epochs):
print('epoch: '+str(epoch))
train_image, train_label, test_image, test_label = shuffle_set(train_image, train_label, test_image, test_label)
# train process
for i in tqdm(range(total_batch)):
train_accuracy_list = []
train_loss_list = []
xs, ys = get_batch(train_image, train_label, batch_size, i, total_batch)
summary,train_accuracy,train_loss, _ = sess.run([merge,accuracy,loss,train_step], feed_dict = {x:xs, y:ys, keep_prob:dropout_rate})
train_accuracy_list.append(train_accuracy)
train_loss_list.append(train_loss)
train_writer.add_summary(summary,epoch)
print('train_acc:'+ str(np.mean(train_accuracy_list)))
print('train_loss:'+ str(np.mean(train_loss_list)))
# test process
if (epoch+1) % test_step ==0:
test_accuracy_list = []
test_loss_list = []
for j in range(test_total_batch):
x_test_batch, y_test_batch = get_batch(test_image, test_label, batch_size, j, test_total_batch)
summary,test_accuracy,test_loss = sess.run([merge, accuracy,loss], feed_dict = {x:x_test_batch, y:y_test_batch, keep_prob:dropout_rate})
test_accuracy_list.append(test_accuracy)
test_loss_list.append(test_loss)
test_writer.add_summary(summary,epoch)
print('test_acc:'+ str(np.mean(test_accuracy_list)))
print('test_loss:'+ str(np.mean(test_loss_list)))
train_writer.close()
test_writer.close()
saver.save(sess, model_path+'/alexnet'+str(model_id))
在这段代码里,我还写了一个包含三层全连接网络的FC_model模型。
开始训练时,我发现loss永远为一个定值“2.3025851”(这个魔性的数字出现了一整天),而accuracy总是在0.03至1.25之间徘徊,改网络结构,训练数据均无效,折腾了一天多才终于发现,网络输出层是不用加relu激活函数的!而我直接调用了自己定义的全连接层函数fc(x, w, b),这个函数的输出结果是已经通过激活函数的!!
重要的事说三遍:
输出层不用激活函数!!!
输出层不用激活函数!!!
输出层不用激活函数!!!
下面再梳理一下各层kernel,bias,strides参数的维度:
kernel:[kernel_size, kernel_size, depth, number],前两个维度也就是长*宽(或者宽*长?一般都是正方形的)depth指的是当前输入的深度,也就是上一层网络的输出个数(等于上一层kernel的个数),number指的是本层kernel的个数(也就是下一层kernel的depth了)
bias:[number],是一个一维向量,number的值等于本层kernel的个数
strides:[1,k,k,1],分别在四个维度上移动,第一个维度和最后一个维度都不做卷积或者pooling,所以为1,k的值为移动步长。
##运行结果:
打印各层输出维度
训练
训练的效果还不错
测试
测试效果也不错,由于数据集比较简单,仅仅训练几个epoch效果就不错了。
下面用tensorboard查看训练过程
命令行输入tensorboard --logdir=train_writwer的地址,打开对应网页
可以查看训练过程
最终训练20个epoch后的结果