机器学习(10.1)--手写数字识别的不同算法比较(1)--mnist数据集不同版本解析及平均灰度实践
代码段一:
# -*- coding:utf-8 -*-
import pickle
import gzip
import numpy as np
import random
#激活函数
def sigmoid(z):
return 1.0 / (1.0 + np.exp(-z))
def sigmoid_deriv(z):
return sigmoid(z) * (1 - sigmoid(z))
#读取数据
with gzip.open(r'mnist.pkl.gz', 'rb') as f:
training_data, validation_data, test_data = pickle.load(f,encoding='bytes')
#整理数据,临时变量training_label,作用,是将结果值,如5,转为[0,0,0,0,0,1,0,0,0,0]
#因为我们这是神经网络,如果不进行转化,在训练结束进行测试时,测试的结果不会正好是5,无法与结果进行判断正确性
#但把结果转化为[0,0,0,0,0,1,0,0,0,0],在测试时结果可能是[0.0001,0.00001,0.002...]这样的结果,我们取最大的那个位数,就行了
training_label = np.zeros([training_data[1].shape[0],10,1])
for index,val in enumerate(training_data[1]): training_label[index][val] = 1
training_data = list(zip(training_data[0].reshape(-1,784,1),training_label))
test_data = list(zip(test_data[0].reshape(-1,784,1),test_data[1]))
#以下三行为辅助代码段,在写代码入调试过程,用全部集合实在有点慢,
trainingNumTmp = 5000
training_data = training_data[0:trainingNumTmp]
test_data = test_data[0:int(trainingNumTmp / 5)]
#1、读取数据,调整数据格式以适配算法,设置基本参数
layers = [training_data[0][0].shape[0],20,15,training_data[0][1].shape[0]]
trainingNum = len(training_data)
epochs = 6 #全部训练集的数据,训练几轮
batchNum = 10 #每循环一次时,对全部数据进行分割,每一小组为几个,如这里设为10,50000条数据就是这样[[10条],[10条]..全部共5000组..[10条],[10条]]
learningRate = 3 #学习率
print('-' * 60)
print("一些基本参数信息")
print('共取得%s条训练集,%s条测试集。' % (trainingNum,len(test_data)))
print('神经网络层数为:' + str(layers) + ",在这里定义为4层,头尾为输入输出层,其实3层效果就可以了,只是为了后面数据推演更清楚,所以我设了4层")
print('每一条图像的维度为%s,结果的维度为%s,注意,一定要是(784, 1)和(10, 1)的二维数组.' % (training_data[0][0].shape,training_data[0][1].shape))
print('-' * 60)
#2、建立初始化的weights和biases
weights = [np.ones((layers[x + 1],layers[x])) / 10 for x in range(len(layers) - 1)]
biases = [np.ones((layers[x + 1],1)) / 10 for x in range(len(layers) - 1)]
strW = ""
strB = ""
for x in weights:
strW+= ('' if strW == '' else ' , ') + str(x.shape)
for x in biases:
strB+= ('' if strB == '' else ' , ') + str(x.shape)
print("所有weight维度为:" + str(strW)) #(20, 784) , (15, 20) , (10, 15)
print("所有biases维度为:" + str(strB)) #(20, 1) , (15, 1) , (10, 1)
print('-' * 60)
tmp = 0 #辅助代码,因为在第一轮的batch运行过程,我中间加了一些显示的数据,
#开始训练
for j in range(epochs):
#以下的if为辅助代码,当第一次运行时,不随机重排列训练集,这样第一次运行时的数据会固定下来,在相应的print,我会打印出一些数据,
#如果你尝试使用相同的逻辑来试写时,这些值应该是一样的
if j == 0:
print("第一次运行,训练集中第一条的第[153]数据为:" + str(np.array(training_data[0][0][153]))) #[0.0703125]
else:
random.shuffle(training_data)
if j == 1 :
#这也是辅助代码,前面初始化weigths和biases用的是固定值,这样做正确率实在太低,
#设置固定值的目的是方便保证,第一轮查看数据计算结果一致,到第二轮时,我会先调整为随机值
weights = [np.random.randn(layers[x + 1],layers[x]) for x in range(len(layers) - 1)]
biases = [np.random.randn(layers[x + 1],1) for x in range(len(layers) - 1)]
batchGroup = [training_data[x:x + batchNum] for x in range(0,trainingNum,batchNum)]
for batch in batchGroup:
batch_w = [np.zeros(b.shape) for b in weights] #建立每组batch相关的初始为0的weight 和 biase,当batch下每条记录全部计算完后,加总求平均并修正weights和biases
batch_b = [np.zeros(b.shape) for b in biases]
for img,label in batch:
tmp+=1
#每一条记录正逆一次计算出一次误差
zs = []
acts = [img]
#3、正向计算(从1层至N层):通过训练集随机抽取一定数量,计算出最后结果,
for w,b in zip(weights,biases):
z = np.dot(w,acts[-1]) + b
zs.append(z)
acts.append(sigmoid(z))
#4、反向计算(从N层至1层):和正确结果进行比较,梯度下降调整weights和biases
item_w = [np.zeros(b.shape) for b in weights] #建立每条记录相关的初始为0的weight 和 biase
item_b = [np.zeros(b.shape) for b in biases]
for index in range(-1,-1 * len(layers),-1):
if index == -1:
item_b[index] = acts[index] - label #使用delta一个过程变量目的:在反向运算时,后一层计算
else:
item_b[index] = np.dot(weights[index + 1].T,item_b[index + 1])
item_b[index] = item_b[index] * sigmoid_deriv(zs[index])
item_w[index] = np.dot(item_b[index],acts[index - 1].T)
if tmp == 10 and index == -1: print("第一轮运行,第10条记录结束时,item的最后一个biases值:" + str(item_b[index][-1])) #[0.25077581]
#当batch下每条记录计算完后加总
for index in range(0,len(batch_w)):
batch_w[index] = batch_w[index] + item_w[index]
batch_b[index] = batch_b[index] + item_b[index]
if tmp == 2 :
print("第一轮运行,第2条记录结束时,batch的最后一个biases值:" + str(batch_b[-1][1])) #[0.12539041]
#一组batch计算结束后,求平均并修正weights和biases
for index in range(0,len(batch_w)):
batch_w[index] = batch_w[index] / batchNum
weights[index] = weights[index] - learningRate * batch_w[index]
batch_b[index] = batch_b[index] / batchNum
biases[index] = biases[index] - learningRate * batch_b[index]
if tmp == 5000: #辅助代码,第一轮运行结束时
print("有四层神经网络,第一轮运行结束时,中间有三个组的weights,即上面的(20, 784) , (15, 20) , (10, 15),\n每组所有的weights的加总值应为:" + str([str(np.sum(x)) for x in weights]))
print("有四层神经网络,第一轮运行结束时,中间有三个组的biases,即上面的(20, 1) , (15, 1) , (10, 1),\n每组所有的biases的加总值应为:" + str([str(np.sum(x)) for x in biases]))
#每组所有的weights的加总值应为:['1567.8833232869501', '-5.362204505945536', '-44.4216909099882']
#每组所有的biases的加总值应为:['1.9975427310316856', '-0.27341581024693457', '-4.277655163275536']
#6、正向计算测试集:计算出结果
#7、和正确结果比较,统计出正确率
correctNum=0
for testImg,testLabel in test_data:
for w,b in zip( weights,biases):
testImg= sigmoid(np.dot(w, testImg)+b)
if np.argmax(testImg)==testLabel : correctNum+=1
print("共 %d 轮训练,第 %d 轮训练结束,测试集数量为 %d 条,测试正确 %d 条。"%(epochs,j+1,len(test_data),correctNum))
print("程序运行结束!")
代码段二:
# -*- coding:utf-8 -*-
import pickle
import gzip
import numpy as np
import random
#激活函数
def sigmoid(z):
return 1.0 / (1.0 + np.exp(-z))
def sigmoid_deriv(z):
return sigmoid(z) * (1 - sigmoid(z))
#读取数据
with gzip.open(r'mnist.pkl.gz', 'rb') as f:
training_data, validation_data, test_data = pickle.load(f,encoding='bytes')
#整理数据,临时变量training_label,作用,是将结果值,如5,转为[0,0,0,0,0,1,0,0,0,0]
training_label = np.zeros([training_data[1].shape[0],10,1])
for index,val in enumerate(training_data[1]): training_label[index][val] = 1
training_data = list(zip(training_data[0].reshape(-1,784,1),training_label))
test_data = list(zip(test_data[0].reshape(-1,784,1),test_data[1]))
#1、读取数据,调整数据格式以适配算法,设置基本参数
layers = [training_data[0][0].shape[0],20,15,training_data[0][1].shape[0]]
trainingNum = len(training_data)
epochs = 6
batchNum = 10
learningRate = 3
#2、建立初始化的weights和biases
weights = [np.random.randn(layers[x + 1],layers[x]) for x in range(len(layers) - 1)]
biases = [np.random.randn(layers[x + 1],1) for x in range(len(layers) - 1)]
for j in range(epochs):
random.shuffle(training_data)
batchGroup = [training_data[x:x + batchNum] for x in range(0,trainingNum,batchNum)]
for batch in batchGroup:
batch_w = [np.zeros(b.shape) for b in weights]
batch_b = [np.zeros(b.shape) for b in biases]
for img,label in batch:
#每一条记录正逆一次计算出一次误差
zs = []
acts = [img]
#3、正向计算(从1层至N层):通过训练集随机抽取一定数量,计算出最后结果,
for w,b in zip(weights,biases):
z = np.dot(w,acts[-1]) + b
zs.append(z)
acts.append(sigmoid(z))
#4、反向计算(从N层至1层):和正确结果进行比较,梯度下降调整weights和biases
item_w = [np.zeros(b.shape) for b in weights] #建立每条记录相关的初始为0的weight 和 biase
item_b = [np.zeros(b.shape) for b in biases]
for index in range(-1,-1 * len(layers),-1):
if index == -1:
item_b[index] = acts[index] - label #使用delta一个过程变量目的:在反向运算时,后一层计算
else:
item_b[index] = np.dot(weights[index + 1].T,item_b[index + 1])
item_b[index] = item_b[index] * sigmoid_deriv(zs[index])
item_w[index] = np.dot(item_b[index],acts[index - 1].T)
#当batch下每条记录计算完后加总
for index in range(0,len(batch_w)):
batch_w[index] = batch_w[index] + item_w[index]
batch_b[index] = batch_b[index] + item_b[index]
#一组batch计算结束后,求平均并修正weights和biases
for index in range(0,len(batch_w)):
batch_w[index] = batch_w[index] / batchNum
weights[index] = weights[index] - learningRate * batch_w[index]
batch_b[index] = batch_b[index] / batchNum
biases[index] = biases[index] - learningRate * batch_b[index]
#6、正向计算测试集:计算出结果
#7、和正确结果比较,统计出正确率
correctNum=0
for testImg,testLabel in test_data:
for w,b in zip( weights,biases):
testImg= sigmoid(np.dot(w, testImg)+b)
if np.argmax(testImg)==testLabel : correctNum+=1
print("共 %d 轮训练,第 %d 轮训练结束,测试集数量为 %d 条,测试正确 %d 条。"%(epochs,j+1,len(test_data),correctNum))
print("程序运行结束!")
代码段三
# -*- coding:utf-8 -*-
import pickle
import gzip
import numpy as np
import random
#激活函数
def sigmoid(z):
return 1.0 / (1.0 + np.exp(-z))
def sigmoid_deriv(z):
return sigmoid(z) * (1 - sigmoid(z))
#读取数据
def loadData(trainingNum = None):
with gzip.open(r'mnist.pkl.gz', 'rb') as f:
training_data, validation_data, test_data = pickle.load(f,encoding='bytes')
training_label = np.zeros([training_data[1].shape[0],10,1])
for index,val in enumerate(training_data[1]): training_label[index][val] = 1
training_data = list(zip(training_data[0].reshape(-1,784,1),training_label))
test_data = list(zip(test_data[0].reshape(-1,784,1),test_data[1]))
if trainingNum !=None:
training_data = training_data[0:trainingNum]
test_data = test_data[0:int(trainingNum / 5)]
return training_data,test_data
def batchData(batch,layers,weights,biases):
batch_w = [np.zeros(b.shape) for b in weights]
batch_b = [np.zeros(b.shape) for b in biases]
for item in batch:
item_w,item_b=itemData(item,layers,weights,biases)
#当batch下每条记录计算完后加总
for index in range(0,len(batch_w)):
batch_w[index] = batch_w[index] + item_w[index]
batch_b[index] = batch_b[index] + item_b[index]
return batch_w,batch_b
def itemData(item,layers,weights,biases):
zs = []
acts = [item[0]]
for w,b in zip(weights,biases):
z = np.dot(w,acts[-1]) + b
zs.append(z)
acts.append(sigmoid(z))
item_w = [np.zeros(b.shape) for b in weights]
item_b = [np.zeros(b.shape) for b in biases]
for index in range(-1,-1 * len(layers),-1):
if index == -1:
item_b[index] = acts[index] - item[1]
else:
item_b[index] = np.dot(weights[index + 1].T,item_b[index + 1])
item_b[index] = item_b[index] * sigmoid_deriv(zs[index])
item_w[index] = np.dot(item_b[index],acts[index - 1].T)
return item_w,item_b
def predict(test_data,weights,biases):
#6、正向计算测试集:计算出结果
#7、和正确结果比较,统计出正确率
correctNum=0
for testImg,testLabel in test_data:
for w,b in zip( weights,biases):
testImg= sigmoid(np.dot(w, testImg)+b)
if np.argmax(testImg)==testLabel : correctNum+=1
return correctNum
def mnistNN(trainingNum = None,midLayes=[20,15],epochs=6,batchNum=10,learningRate=3):
training_data,test_data=loadData(trainingNum)
#1、读取数据,调整数据格式以适配算法,设置基本参数
layers = [training_data[0][0].shape[0]]+midLayes+[training_data[0][1].shape[0]]
trainingNum = len(training_data)
#2、建立初始化的weights和biases
weights = [np.random.randn(layers[x + 1],layers[x]) for x in range(len(layers) - 1)]
biases = [np.random.randn(layers[x + 1],1) for x in range(len(layers) - 1)]
for j in range(epochs):
random.shuffle(training_data)
batchGroup = [training_data[x:x + batchNum] for x in range(0,trainingNum,batchNum)]
for batch in batchGroup:
batch_w,batch_b=batchData(batch,layers,weights,biases)
#一组batch计算结束后,求平均并修正weights和biases
for index in range(0,len(batch_w)):
batch_w[index] = batch_w[index] / batchNum
weights[index] = weights[index] - learningRate * batch_w[index]
batch_b[index] = batch_b[index] / batchNum
biases[index] = biases[index] - learningRate * batch_b[index]
print("共 %d 轮训练,第 %d 轮训练结束,测试集数量为 %d 条,测试正确 %d 条。"%(epochs,j+1,len(test_data),predict(test_data,weights,biases)))
mnistNN(trainingNum=5000)