h1 = np.maximum(0,np.dot(X, W1) + b1) # 计算第一个隐层的激活数据(NxH)
scores=np.dot(h1, W2) + b2 # 神经元输出(NxC)
scores = scores - np.reshape(np.max(scores,axis=1),(N,-1))#NxC
#scores中的每个元素减去这行的最大值
#axis=1按照列标签向下执行
p = np.exp(scores)/np.reshape(np.sum(np.exp(scores),axis=1),(N,-1))#NxC
#scoes中e每个元素除以e每行元素之和
loss = -sum(np.log(p[np.arange(N),y]))/N
#loss是一个数,取对数之后求和
loss += 0.5*reg*np.sum(W1*W1)+0.5*reg*np.sum(W2*W2)
#正则化
dscores = p
dscores[range(N),y]-=1.0#NxC
#这个有公式推导的,就是这个样子,p中这个类别中的元素减去这个类别的
dscores/=N#loss中除以了n所以这里也要除
dW2 = np.dot(h1.T,dscores)#HxC
dh2 = np.sum(dscores,axis=0,keepdims=False)#C*1
#对h2[i]求导的时候,因为scores中的每一列都包含了h2[i],所以得把这一列累加起来
#然后弄成一个列向量
da2 = np.dot(dscores,W2.T)#NxH
#此时是经过relu之后的
da2[h1<=0]=0#NxH
#relu'=max(0,1)
dW1 = np.dot(X.T,da2)#DxH
dh1 = np.sum(da2,axis=0,keepdims=False)#Hx1
#隐藏层的偏置项
dW2 += reg*W2#正则化
dW1 += reg*W1#正则化
grads['W1']=dW1
grads['b1']=dh1
grads['W2']=dW2
grads['b2']=dh2
通过对训练集和测试集精度的计算,可以检查模型的过拟合程度。
输入32*32*3=3072
维,隐藏层有50
个神经元,所以输入层到隐藏层之间的权重矩阵的尺寸为3072*50
,还原成图片之后,就可以用50
张图片表示权重矩阵。
N, C, H, W=x.shape
F, _, HH, WW=w.shape
stride=conv_param['stride']
pad=conv_param['pad']
x_pad=np.pad(x,((0, 0), (0, 0),(pad,pad),(pad,pad)),mode='constant')
H_out = 1 + (H + 2 * pad - HH) // stride
W_out = 1 + (W + 2 * pad - WW) // stride
out=np.zeros((F, N, H_out, W_out))
for i in range(F):
for j in range(N):
for k in range(H_out):
for l in range(W_out):
out[i,j,k,l]=np.sum(x_pad[j,:,k*stride:k*stride+HH,l*stride:l*stride+WW]*w[i])+b[i]
out=out.transpose(1,0,2,3)#N,F,H_out,W_out
assert (W + 2 * pad - WW) % stride == 0, 'width does not work'
assert (H + 2 * pad - HH) % stride == 0, 'height does not work'
H_new = 1 + (H + 2 * pad - HH) // stride
W_new = 1 + (W + 2 * pad - WW) // stride
dx = np.zeros_like(x)
dw = np.zeros_like(w)
db = np.zeros_like(b)
s = stride
x_padded = np.pad(x, ((0, 0), (0, 0), (pad, pad), (pad, pad)), 'constant')
dx_padded = np.pad(dx, ((0, 0), (0, 0), (pad, pad), (pad, pad)), 'constant')
for i in range(N): # ith image
for f in range(F): # fth filter
for j in range(H_new):
for k in range(W_new):
window = x_padded[i, :, j*s:HH+j*s, k*s:WW+k*s]
db[f] += dout[i, f, j, k]
dw[f] += window * dout[i, f, j, k]
dx_padded[i, :, j*s:HH+j*s, k*s:WW+k*s] += w[f] * dout[i, f, j, k]
# Unpad
dx = dx_padded[:, :, pad:pad+H, pad:pad+W]
首先加载npy文件,使用vgg16的特征提取器将224*224*3
的图像提取成4096
维的向量。
数据集中包含5类花,数量如下:
类别 | 数量 |
---|---|
daisy | 633 |
dandelion | 898 |
roses | 641 |
sunflower | 699 |
tulips | 799 |
将提取出的特征送入一个全连接层,由于是五个类别,所以输出层的维度为5.
# 输入数据的维度
inputs_ = tf.placeholder(tf.float32, shape=[None, codes.shape[1]])
# 标签数据的维度
labels_ = tf.placeholder(tf.int64, shape=[None, labels_vecs.shape[1]])
# 加入一个256维的全连接的层
fc = tf.contrib.layers.fully_connected(inputs_, 256)
# 加入一个5维的全连接层
logits = tf.contrib.layers.fully_connected(fc, labels_vecs.shape[1], activation_fn=None)
# 计算cross entropy值
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=labels_, logits=logits)
# 计算损失函数
cost = tf.reduce_mean(cross_entropy)
# 采用用得最广泛的AdamOptimizer优化器
optimizer = tf.train.AdamOptimizer().minimize(cost)
# 得到最后的预测分布
predicted = tf.nn.softmax(logits)
# 计算准确度
correct_pred = tf.equal(tf.argmax(predicted, 1), tf.argmax(labels_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
将提取出的4096维向量放入网络进行训练:
Train shapes (x, y): (2936, 4096) (2936, 5)
Validation shapes (x, y): (367, 4096) (367, 5)
Test shapes (x, y): (367, 4096) (367, 5)
Epoch: 1/20 Iteration: 0 Training loss: 6.99853
....
Epoch: 20/20 Iteration: 199 Training loss: 0.00870
Epoch: 19/20 Iteration: 200 Validation Acc: 0.8856
验证集:测试集 = 8:1:1,进行测试
Test accuracy: 0.8828
import os
from skimage import io,transform
import numpy as np
import tensorflow as tf
from tensorflow_vgg import vgg16
from tensorflow_vgg import utils
with tf.Session() as sess:
# 构建VGG16模型对象
vgg = vgg16.Vgg16()
input_ = tf.placeholder(tf.float32, [None, 224, 224, 3])
# 定义形参
with tf.name_scope("content_vgg"):
# tf.name_scope可以让变量有相同的命名,只是限于tf.Variable的变量
# 载入VGG16模型
vgg.build(input_)
# 计算测试图片的特征值
data_dir="tensorflow_vgg/test_data/rose.jpg"
img = utils.load_image(data_dir)
img.resize((1, 224, 224, 3))
feed_dict = {input_: img}
codes = sess.run(vgg.relu6, feed_dict=feed_dict)
# codes为计算得到的特征值
tf.reset_default_graph()
# 输入数据的维度
inputs_ = tf.placeholder(tf.float32, shape=[None, codes.shape[1]])
# 加入一个256维的全连接的层
fc = tf.contrib.layers.fully_connected(inputs_, 256)
# 加入一个5维的全连接层
logits = tf.contrib.layers.fully_connected(fc, 5, activation_fn=None)
saver = tf.train.Saver()
flower_dict = {0:'dasiy',1:'dandelion',2:'roses',3:'sunflowers',4:'tulips'}
model_path = "checkpoints/flowers.ckpt"
with tf.Session() as sess:
saver.restore(sess, "F:/GitHub/vgg/checkpoints/flowers.ckpt")
print("Model restored.")
feed_dict = {inputs_: codes}
classification_result = sess.run(logits, feed_dict=feed_dict)
# 打印出预测矩阵
print(classification_result)
# 打印出预测矩阵每一行最大值的索引
print(tf.argmax(classification_result, 1).eval())
# 根据索引通过字典对应花的分类
output = []
output = tf.argmax(classification_result, 1).eval()
for i in range(len(output)):
print("第", i + 1, "朵花预测:" + flower_dict[output[i]])
F:\GitHub\vgg\tensorflow_vgg\vgg16.npy
npy file loaded
build model started
build model finished: 0s
Model restored.
[[ -2.32456613 -32.58225632 41.21063614 -28.55062294 0.66678715]]
[2]
第 1 朵花预测:roses