def loadData(self, path):
self.data = scio.loadmat(path)
self.x = self.data["X"] # (5000, 400) # 原100训练
self.y = self.data["y"] # (5000, 1)
index = random.sample([i for i in range(5000)], 100) # 随机100个没有重复的数字
self.pics = self.x[index, :] # (100, 400)
# 为了能显示Theta1,对函数做了一点小修改
def display100Data(pics):
example_width = int(np.sqrt(pics.shape[1])) # 每张图片的宽
example_hight = pics.shape[1] // example_width
display_rows = int(np.sqrt(pics.shape[0])) # 每行显示几张图片
display_cols = pics.shape[0] // display_rows
# print(pics[45, :])
display_array = np.ones((1+display_rows*(example_hight+1), 1+display_cols*(example_width+1)))*200
curr_ex = 0 # 当前每行张数
for i in range(display_rows):
for j in range(display_cols):
if curr_ex >= pics.shape[0]:
max_val = np.max(np.abs(pics[curr_ex, :]))
display_array[1+j*(example_hight+1):(j+1)*(example_hight+1), 1+i*(example_width+1):(i+1)*(example_width+1)] = \
pics[curr_ex, :].reshape((20, 20)).transpose()/max_val*255
curr_ex += 1
if curr_ex >= pics.shape[0]:
plt.title("What the W1 look like from the NN Learning")
plt.imshow(display_array, cmap='gray')
神经网络共3层,输入层,1层隐藏层,输出层:输入层401个输入(第1个为1), 隐藏层26个单元,输出层10个单元(对应着0-9),如下图
def nnCostFunction(self, theta, x, y, lamda):
m = x.shape[0]
theta1 = np.reshape(theta[:self.hidden_layer_size*(self.input_layer_size+1)], (self.hidden_layer_size, self.input_layer_size+1))
theta2 = np.reshape(theta[self.hidden_layer_size*(self.input_layer_size+1)::], (self.num_labels, self.hidden_layer_size+1))
y = self.handleYtoOne(y)
a1 = np.hstack([np.ones((m, 1)), x]) # 5000, 401
z2 = a1.dot(theta1.T) # 5000*25
a2 = self.sigmoid(z2)
n = a2.shape[0] # 5000
a2 = np.hstack([np.ones((n, 1)), a2]) # 5000*26
z3 = a2.dot(theta2.T)
a3 = self.sigmoid(z3) # 5000*10
上述代码中有个非常重要的地方,就是y = self.handleYtoOne(y)
因为在Octave中源代码使用了y = eye(num_labels)(y,:);
如果y = [10,10,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9]
0 0 0 0 0 0 0 0 0 1
0 0 0 0 0 0 0 0 0 1
1 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0
0 1 0 0 0 0 0 0 0 0
0 1 0 0 0 0 0 0 0 0
0 0 1 0 0 0 0 0 0 0
0 0 1 0 0 0 0 0 0 0
0 0 0 1 0 0 0 0 0 0
0 0 0 1 0 0 0 0 0 0
0 0 0 0 1 0 0 0 0 0
0 0 0 0 1 0 0 0 0 0
0 0 0 0 0 1 0 0 0 0
0 0 0 0 0 1 0 0 0 0
0 0 0 0 0 0 1 0 0 0
0 0 0 0 0 0 1 0 0 0
0 0 0 0 0 0 0 1 0 0
0 0 0 0 0 0 0 1 0 0
0 0 0 0 0 0 0 0 1 0
0 0 0 0 0 0 0 0 1 0
def nnGradient(self, theta, x, y, lamda):
m = x.shape[0]
theta1 = np.reshape(theta[:self.hidden_layer_size*(self.input_layer_size+1)], (self.hidden_layer_size, self.input_layer_size+1))
theta2 = np.reshape(theta[self.hidden_layer_size*(self.input_layer_size+1)::], (self.num_labels, self.hidden_layer_size+1))
y = self.handleYtoOne(y)
a1 = np.hstack([np.ones((m, 1)), x]) # 5000, 401
z2 = a1.dot(theta1.T) # 5000*25
a2 = self.sigmoid(z2)
n = a2.shape[0] # 5000
a2 = np.hstack([np.ones((n, 1)), a2]) # 5000*26
z3 = a2.dot(theta2.T)
a3 = self.sigmoid(z3) # 5000*10
delta3 = a3 - y
delta2 = delta3.dot(theta2)
delta2 = delta2[:, 1::]
delta2 = delta2*self.sigmoidGradient(z2) # 5000*25
Delta1 = np.zeros(theta1.shape)
Delta2 = np.zeros(theta2.shape)
Delta1 = Delta1 + delta2.T.dot(a1)
Delta2 = Delta2 + delta3.T.dot(a2)
Theta1_grad = 1/m*Delta1
Theta2_grad = 1/m*Delta2
Regularized_T1 = lamda/m*theta1
Regularized_T2 = lamda/m*theta2
Regularized_T1[:, 0] = np.zeros((Regularized_T1.shape[0], ))
Regularized_T2[:, 0] = np.zeros((Regularized_T2.shape[0], ))
Theta1_grad += Regularized_T1
Theta2_grad += Regularized_T2
grade = np.hstack([Theta1_grad.flatten(), Theta2_grad.flatten()])
return grade
在神经网络中,我们要最小化损失函数J, 而J是Theta的函数,我们在Theta周围找个很小的值e=0.0001,近似的计算下梯度,和步骤4中计算出的梯度进行简单的比较,如果差别不大,证明梯度求解没问题。原理公式如下:
def computeNumericalGradient(self, theta, x, y, lamda): # (f(x+delta)-f(x-delta))/(2*delta)
e = 0.0001
numgrad = np.zeros(theta.shape)
perturb = np.zeros(theta.shape)
for i in range(theta.size):
perturb[i] = e
loss1 = self.nnCostFunction(theta - perturb, x, y, lamda)
loss2 = self.nnCostFunction(theta + perturb, x, y, lamda)
numgrad[i] = ((np.array(loss2) - np.array(loss1))/(2*e))
perturb[i] = 0
return numgrad
def checkNNGradients(self, lamda):
self.input_layer_size = 3
self.hidden_layer_size = 5
self.num_labels = 3
m = 5
theta1 = self.debugInitializeWeights(self.hidden_layer_size, self.input_layer_size)
theta2 = self.debugInitializeWeights(self.num_labels, self.hidden_layer_size)
x = self.debugInitializeWeights(m, self.input_layer_size-1)
y = 1 + np.mod([i+1 for i in range(m)], self.num_labels).T
theta = np.hstack([theta1.flatten(), theta2.flatten()])
cost = self.nnCostFunction(theta, x, y, lamda)
grad = self.nnGradient(theta, x, y, lamda)
numgrad = self.computeNumericalGradient(theta, x, y, lamda)
# 求解最大奇异值
diff = max((numgrad-grad)/(numgrad+grad))
print(np.hstack([grad.reshape(-1, 1), numgrad.reshape(-1, 1)]))
print("Relative Difference:", diff)
(1).原代码输入(5000, 401),tf中(5000, 400);
(2).权重的维度也简单的做了改变,Theta1:(26, 400)=>(25,400),Theta2(10, 26)=>(10, 25);
X = tf.placeholder(tf.float32, [None, 400])
Y = tf.placeholder(tf.float32, [None, 10])
h1 = tf.Variable(tf.random_normal([400, 25]))
h2 = tf.Variable(tf.random_normal([25, 10]))
b1 = tf.Variable(tf.random_normal([25]))
b2 = tf.Variable(tf.random_normal([10]))
def neural_net(x):
layer_1 = tf.add(tf.matmul(x, h1), b1)
output_layer = tf.add(tf.matmul(layer_1, h2), b2)
return output_layer
with tf.Session() as sess:
x1, y0 = loadData('ex4data1.mat')
y1 = handleYtoOne(y0)
index = random.sample([i for i in range(5000)], 4000) # 80%training 20%testing
train_x = x1[index, :]
train_y = y1[index, :]
test_x = np.delete(x1, index, 0)
test_y = np.delete(y1, index, 0)
for i in range(100):
sess.run(train_op, feed_dict={X: train_x, Y: train_y})
loss, acc = sess.run([loss_op, accuracy], feed_dict={X: train_x, Y: train_y})
print("\r训练{}次: 损失函数{:.4f} | 精度{:.4f}".format(i, loss, acc), end="") # 精度可达94%
print("\nTest Accuracy:%.4f%%" % (sess.run(accuracy, feed_dict={X: test_x, Y: test_y}))) # 精度89.5%
pics = sess.run(h1)
Theta1的维度(400, 25)