Softmax的算法 :
我们最终目的是要得到一个概率,所以中间层用softmax没有任何意义,只对最后一层使用即可,同时训练的过程意义也不太大以下这小段是在训练过程增加的内容,其实从softmax的作用来看,在训练过程加这个也意义不大,不加也没有什么影响
for w,b in zip(weights,biases):
z = np.dot(w,acts[-1]) + b
zs.append(z)
acts.append(sigmoid(z))
#在正向计算的后面重新计算a[-1]即可
sumExp=np.exp(zs[-1]).sum()
for index in range(len(acts[-1])):
acts[-1][index]=np.exp(zs[-1][index])/sumExp
以下这小段是在测试过程增加与调整的内容,
def predict(test_data,weights,biases):
#6、正向计算测试集:计算出结果
#7、和正确结果比较,统计出正确率
correctNum=0
for testImg,testLabel in test_data:
for index,value in enumerate(zip( weights,biases)):
w=value[0]
b=value[1]
if index
最后附上所有代码
# -*- coding:utf-8 -*-
import pickle
import gzip
import numpy as np
import random
#激活函数
def sigmoid(z):
return 1.0 / (1.0 + np.exp(-z))
def sigmoid_deriv(z):
return sigmoid(z) * (1 - sigmoid(z))
#读取数据
def loadData(trainingNum = None,testNum=None):
with gzip.open(r'mnist.pkl.gz', 'rb') as f:
training_data, validation_data, test_data = pickle.load(f,encoding='bytes')
training_label = np.zeros([training_data[1].shape[0],10,1])
for index,val in enumerate(training_data[1]): training_label[index][val] = 1
training_data = list(zip(training_data[0].reshape(-1,784,1),training_label))
test_data = list(zip(test_data[0].reshape(-1,784,1),test_data[1]))
if trainingNum !=None:
training_data = training_data[0:trainingNum]
if trainingNum !=None:
test_data = test_data[0:testNum]
return training_data,test_data
def batchData(batch,layers,weights,biases):
batch_w = [np.zeros(b.shape) for b in weights]
batch_b = [np.zeros(b.shape) for b in biases]
for item in batch:
item_w,item_b=itemData(item,layers,weights,biases)
#当batch下每条记录计算完后加总
for index in range(0,len(batch_w)):
batch_w[index] = batch_w[index] + item_w[index]
batch_b[index] = batch_b[index] + item_b[index]
return batch_w,batch_b
def itemData(item,layers,weights,biases):
'''单条记录的正反向计算'''
#正向计算
zs = []
acts = [item[0]]
for w,b in zip(weights,biases):
z = np.dot(w,acts[-1]) + b
zs.append(z)
acts.append(sigmoid(z))
#在正向计算的后面重新计算a[-1]即可
sumExp=np.exp(zs[-1]).sum()
for index in range(len(acts[-1])):
acts[-1][index]=np.exp(zs[-1][index])/sumExp
#反向计算
item_w = [np.zeros(b.shape) for b in weights]
item_b = [np.zeros(b.shape) for b in biases]
for index in range(-1,-1 * len(layers),-1):
if index == -1:
item_b[index] = acts[index] - item[1]
else:
item_b[index] = np.dot(weights[index + 1].T,item_b[index + 1])
#二次方代价函数 两个差别只是后面有没有乘 * sigmoid_deriv(zs[index])
#在代码中的差异只是进位不同,
#item_b[index] = item_b[index] * sigmoid_deriv(zs[index])
item_b[index] = item_b[index] * sigmoid_deriv(zs[index]) #交叉熵代价函数
item_w[index] = np.dot(item_b[index],acts[index - 1].T)
return item_w,item_b
def predict(test_data,weights,biases):
#6、正向计算测试集:计算出结果
#7、和正确结果比较,统计出正确率
correctNum=0
for testImg,testLabel in test_data:
for index,value in enumerate(zip( weights,biases)):
w=value[0]
b=value[1]
if index