利用神经网络预测手写数字
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
import scipy.optimize as opt
from sklearn.metrics import classification_report # 这个包是评价报告
from sklearn.preprocessing import OneHotEncoder
def expand_y(y):
"""
设置标签值为向量 例如y[0]=6转化为y[0]=[0,0,0,0,0,1,0,0,0,0]
"""
result = []
for i in y:
y_array = np.zeros(10)
y_array[i-1] = 1
result.append(y_array)
return np.array(result)
def load_mat(path):
'''读取数据'''
data = loadmat('ex4data1.mat') # loadmat返回值是一个字典
X = data['X'] # X中存储的是像素值 5000*400
y = data['y'].flatten() # flatten函数返回一维数组 1*5000
return X, y
def load_weight(path):
"""
读取权重函数
"""
data = loadmat(path)
return data['Theta1'], data['Theta2']
def sigmoid(z):
"""
sigmoid函数
"""
return 1 / (1 + np.exp(-z))
def feed_forward(theta, X):
'''得到每层的输入和输出'''
t1, t2 = deserialize(theta) # 提取参数 t1是第一层到第二层的 t2是第二层到第三层的
a1 = X #初始值
z2 = a1 @ t1.T # X乘参数
a2 = np.insert(sigmoid(z2), 0, 1, axis=1) #加a0 并且放入sigmoid函数中
z3 = a2 @ t2.T #第二层到第三层
a3 = sigmoid(z3)
return a1, z2, a2, z3, a3
def cost(theta, X, y):
a1, z2, a2, z3, h = feed_forward(theta, X)#前馈神经网络 第一层401个单元 第二层26个单元 第三层10个单元
J = - y * np.log(h) - (1 - y) * np.log(1 - h) #矩阵点乘
return J.sum() / len(X)
def regularized_cost(theta, X, y, l=1):
'''正则化时忽略每层的偏置项,也就是参数矩阵的第一列'''
t1, t2 = deserialize(theta)
reg = np.sum(t1[:,1:] ** 2) + np.sum(t2[:,1:] ** 2) # 正则项
return l / (2 * len(X)) * reg + cost(theta, X, y) # 代价函数
def deserialize(seq):
'''
提取参数
'''
return seq[:25*401].reshape(25, 401), seq[25*401:].reshape(10, 26)
def serialize(a, b):
'''
展开参数
'''
return np.r_[a.flatten(),b.flatten()]
def sigmoid_gradient(z):
"""
sigmoid函数求导
"""
return sigmoid(z) * (1 - sigmoid(z))
def random_init(size):
'''从服从的均匀分布的范围中随机返回size大小的值'''
return np.random.uniform(-0.12, 0.12, size)
def gradient(theta, X, y):
'''
unregularized gradient, notice no d1 since the input layer has no error
return 所有参数theta的梯度,故梯度D(i)和参数theta(i)同shape,重要。
'''
t1, t2 = deserialize(theta)
a1, z2, a2, z3, h = feed_forward(theta, X)
d3 = h - y # (5000, 10)
d2 = d3 @ t2[:, 1:] * sigmoid_gradient(z2) # (5000, 25)
D2 = d3.T @ a2 # (10, 26)
D1 = d2.T @ a1 # (25, 401)
D = (1 / len(X)) * serialize(D1, D2) # (10285,)
return D
def gradient_checking(theta, X, y, e=0.0001):
def a_numeric_grad(plus, minus):
"""
对每个参数theta_i计算数值梯度,即理论梯度。
"""
return (regularized_cost(plus, X, y) - regularized_cost(minus, X, y)) / (e * 2)
numeric_grad = []
for i in range(len(theta)):
plus = theta.copy() # deep copy otherwise you will change the raw theta
minus = theta.copy()
plus[i] = plus[i] + e
minus[i] = minus[i] - e
grad_i = a_numeric_grad(plus, minus)
numeric_grad.append(grad_i)
numeric_grad = np.array(numeric_grad)
analytic_grad = regularized_gradient(theta, X, y)
diff = np.linalg.norm(numeric_grad - analytic_grad) / np.linalg.norm(numeric_grad + analytic_grad)
print(
'If your backpropagation implementation is correct,\nthe relative difference will be smaller than 10e-9 (assume epsilon=0.0001).\nRelative Difference: {}\n'.format(
diff))
def regularized_gradient(theta, X, y, l=1):
"""
不惩罚偏置单元的参数 正则化神经网络
"""
D1, D2 = deserialize(gradient(theta, X, y))
t1[:, 0] = 0
t2[:, 0] = 0
reg_D1 = D1 + (l / len(X)) * t1
reg_D2 = D2 + (l / len(X)) * t2
return serialize(reg_D1, reg_D2)
def nn_training(X, y):
init_theta = random_init(10285) # 25*401 + 10*26
res = opt.minimize(fun=regularized_cost,
x0=init_theta,
args=(X, y, 1),
method='TNC',
jac=regularized_gradient,
options={'maxiter': 400})
return res
def accuracy(theta, X, y):
_, _, _, _, h = feed_forward(res.x, X)
y_pred = np.argmax(h, axis=1) + 1
print(classification_report(y, y_pred))
# def plot_100_images(X):
# """随机画100个数字"""
# index = np.random.choice(range(5000), 100) # 随机选择索引
# images = X[index] # 选择随机索引的像素值
# fig, ax_array = plt.subplots(10, 10, sharey=True, sharex=True, figsize=(8, 8)) # 画出10列10行的图像 设定尺寸
# for r in range(10): # 遍历10行10列
# for c in range(10):
# ax_array[r, c].matshow(images[r*10 + c].reshape(20,20), cmap='gray_r') # 每一幅图的尺寸都是20*20 像素点存在images中
# plt.xticks([]) # 取消x轴坐标
# plt.yticks([])
# plt.show()
# ——————————————1.加载数据及可视化——————————————————
X,y = load_mat('ex4data1.mat')
# plot_100_images(X)
# ——————————————2.模型表示——————————————————
# 输入有400个单元 三层神经网络
# ——————————————2.1 读取数据——————————————————
raw_X, raw_y = load_mat('ex4data1.mat')
X = np.insert(raw_X, 0, 1, axis=1) # 加一列x0 (5000, 401)
y = expand_y(raw_y) #(5000, 10)
# ——————————————2.2 读取权重——————————————————
# 第二层25个元素
t1, t2 = load_weight('ex4weights.mat') # ((25, 401), (10, 26))
# ——————————————2.3 展开参数——————————————————
theta = serialize(t1, t2) # 扁平化参数,25*401+10*26=10285
# ——————————————3. 训练神经网络——————————————————
# gradient_checking(theta, X, y)#这个运行很慢,谨慎运行
res = nn_training(X, y)#慢
print (res)
# ——————————————4. 检验——————————————————
accuracy(res.x, X, raw_y)