数据集图片https://download.csdn.net/download/qq_42363032/12737988
import tensorflow as tf
import random
import matplotlib.pyplot as plt
import numpy
import os
from scipy import misc
import tensorflow.contrib as con
tf.set_random_seed(777)
'''
定义要操作数据量大小、训练集测试集大小、图片尺寸
'''
all_num = 1000 # 处理的样本总数
train_num = int(all_num * 0.9) # 90%训练集
test_num = all_num - train_num # 10%测试集
IMGSIZE = 100 # 输入图片的宽度和高度,不能太小,太小失真了无法识别
'''
获取图片并分割数据集
filePath: 图片根目录
+ 先获取图片位置并制作对应标签,划分训练集和测试集
+ 打乱数据集顺序
+ 读图片并划分训练接和测试集 返回数据
'''
def get_dataset(filePath):
img_loc = [] # 图片列表 存地址
label_list = [] # 类别标签
catCount, dogCount = 0, 0 # 统计数量
for item in os.listdir(filePath): # listdir 读路径下的所有文件以列表形式返回 件名形如cat.0.jpg
path = filePath + '\\' + item
img_loc.append(path)
if item[:3] == 'cat':
label_list.append([1, 0])
catCount += 1
else:
label_list.append([0, 1])
dogCount += 1
print('数据集中有%d只猫,%d只狗.' % (catCount, dogCount))
# 随机洗牌 乱序
numpy.random.seed(1)
shuffled_indices = numpy.random.permutation(numpy.arange(len(label_list)))
x_shuffied = numpy.array(img_loc)[shuffled_indices]
y_shuffied = numpy.array(label_list)[shuffled_indices]
# 分割数据集
image_train = x_shuffied[:train_num]
label_train = y_shuffied[:train_num]
image_test = x_shuffied[-test_num:] # -100开始到最后
label_test = y_shuffied[-test_num:]
print(image_train.shape, label_train.shape)
# 读取图片并处理尺寸
img_train, img_test = [], []
# 读入训练集
for i in range(train_num):
img = readimg(image_train[i])
img_train.append(img)
# 读入测试集
for i in range(test_num):
img = readimg(image_test[i])
img_test.append(img)
img_train = numpy.array(img_train) # 变成二维
img_test = numpy.array(img_test)
return img_train, img_test, label_train, label_test
# 读入图片
def readimg(file):
image = plt.imread(file)
image = misc.imresize(image, (IMGSIZE, IMGSIZE)) # 处理尺寸 100*100
image = image / 255. # 归一化
return image
# next_batch 顺序返回一批数据
g_b = 0
def next_batch(X, Y, size):
global g_b
x = X[g_b:g_b + size]
y = Y[g_b:g_b + size]
g_b = g_b + size
return x, y
img_train, img_test, label_train, label_test = get_dataset(r'G:\A_深度学习1\tensorflow\data\train')
print(img_train.shape, label_train.shape)
print('============================')
# 占位符
X, Y = tf.placeholder('float', [None, IMGSIZE, IMGSIZE, 3]), tf.placeholder('float', [None, 2])
# 第一层卷积
with tf.variable_scope('conv1'):
W1 = tf.Variable(tf.random_normal([3, 3, 3, 16])) # 卷积核
L1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME') # 卷积运算
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # 池化层
# 第二层卷积
with tf.variable_scope('conv2'):
W2 = tf.Variable(tf.random_normal([3, 3, 16, 16], stddev=0.01))
L2 = tf.nn.conv2d(L1, W2, strides=[1, 1, 1, 1], padding='SAME')
L2 = tf.nn.relu(L2)
L2 = tf.nn.max_pool(L2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
# 变成一维向量
dim = L2.get_shape()[1].value * L2.get_shape()[2].value * L2.get_shape()[3].value
L2_flat = tf.reshape(L2, [-1, dim])
# 全连接1 隐藏层
with tf.variable_scope('fc1'):
W3 = tf.get_variable(name='W3', shape=[dim, 128], initializer=con.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([128]))
L3 = tf.nn.relu(tf.matmul(L2_flat, W3) + b3)
# 全连接层2
with tf.variable_scope('fc2'):
W4 = tf.get_variable('W4', shape=[128, 128], initializer=tf.truncated_normal_initializer(stddev=0.005))
b4 = tf.get_variable('b4', shape=[128], initializer=tf.constant_initializer(0.1))
L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)
# 正则化
L4 = tf.nn.dropout(L4, keep_prob=0.9) # 百分之10神经元随机失活
# sortmax层 输出层
with tf.variable_scope('softmax'):
W5 = tf.get_variable('W5', shape=[128, 2], initializer=tf.truncated_normal_initializer(stddev=0.005))
b5 = tf.get_variable('b5', shape=[2], initializer=tf.constant_initializer(0.1))
# logits = tf.matmul(L4, W5) + b5
logits = tf.add(tf.matmul(L4, W5), b5)
# 代价
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cost)
# 准确率
corr = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
acc = tf.reduce_mean(tf.cast(corr, 'float'))
epochs = 100 # 训练总周期
batch_size = 100 # 训练每批样本数
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(epochs):
avg_cost = 0
total_batch = int(train_num / batch_size)
g_b = 0
for i in range(total_batch):
x, y = next_batch(img_train, label_train, batch_size)
c, _ = sess.run([cost, optimizer], feed_dict={X: x, Y: y})
avg_cost += c / total_batch
if epoch % 20 == 0:
acc_ = sess.run(acc, feed_dict={X: img_train, Y: label_train})
print('Epoch:', (epoch + 1), 'cost=', avg_cost, 'acc=', acc_)
# 测试模型检查准确率
print('Accuracy:', sess.run(acc, feed_dict={X: img_test, Y: label_test}))
# 在测试集中随机抽一个样本进行测试
r = random.randint(0, test_num - 1)
print("Label: ", sess.run(tf.argmax(label_test[r:r + 1], 1)))
print("Prediction: ", sess.run(tf.argmax(logits, 1), feed_dict={X: img_test[r:r + 1]}))