数据集ex3data1.mat包含了5000条手写数字的训练样本,每个训练样本是 20 * 20 的像素灰度的矩阵。每一个像素值用浮点数来表示对应位置的灰度值,并被展开成400维的向量。即矩阵X中每一行代表一个训练样本。数据集ex3data1.mat中还包含了向量y,包含5000个样本的标签。
import random
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as io
from scipy.optimize import minimize
data = io.loadmat('ex3data1.mat')
data1 = data['X']
label = data.get('y')
pick_one = random.randint(0, 5000)
x = data1[pick_one, :]
fig, ax = plt.subplots(figsize=(1, 1))
ax.matshow(x.reshape(20, 20).T, cmap='gray_r')
plt.xticks([])
plt.yticks([])
plt.show()
def plot_image(x):
sample_index = np.random.choice(np.arange(x.shape[0]), 100)
image = x[sample_index, :]
fig, ax = plt.subplots(10, 10, 'all','all',figsize=(10, 10))
for i in range(10):
for j in range(10):
ax[i,j].matshow(image[i*10+j,:].reshape(20,20),cmap='gray_r')
plt.xticks([])
plt.yticks([])
plt.show()
plot_image(data1)
使用np.random.choice在给定的序列中随机选100个随机数,(其中np.arange有点类似range,创造一个从0递增到该数的序列)使用该序列抽取出数据中X对应的行数,并使用上面的方法将100个图像画出来
def sigmoid(x1):
'''Numerically-stable sigmoid function.'''
#用这个函数会更稳定,直接用return 1/1+np.exp(-z)会在数值太大和太小时出bug
h = np.zeros(len(x1))
for i, x in enumerate(x1):
if x >= 0:
z = np.exp(-x)
h[i] = 1 / (1 + z)
else:
z = np.exp(x)
h[i] = z / (1 + z)
return h
def cost(theta, X, y, lamda):
H = sigmoid(X @ theta)
inner1 = (-y) * np.log(H)
inner2 = (1 - y) * np.log(1 - H)
inner3 = lamda * np.sum(np.power(theta[1:], 2)) / (2 * len(X))
return np.sum(inner1 - inner2) / len(y) + inner3
def descent(theta, X, y, lamda):
H = sigmoid(X @ theta)
parameter = len(theta)
temp = np.zeros(parameter)
for j in range(parameter):
inner = (H - y) * X[:, j]
if j == 0:
temp[j] = np.sum(inner) / len(y)
else:
temp[j] = (np.sum(inner) + lamda * theta[j]) / len(y)
return temp
sigmoid、cost函数、梯度下降
X = np.insert(X, 0, values=np.ones(100), axis=1)
def one_vs_all_classification(X,y,lamda):
Theta = np.zeros((10, 401))
for k in range(1, 11):
theta = np.random.rand(401)*0.04-0.02
y_i = np.array([1 if label == k else 0 for label in y])
# 多个分类转化为两类分类
fmin = minimize(fun=cost, x0=theta, args=(X, y_i, lamda), method='TNC', jac=descent)
Theta[k-1] = fmin.x
return Theta
all_Theta = one_vs_all_classification(X,y,1)
theta初始化时每一个theta都选择[-0.0.2,0.02]的随机数,这里不能将theta的值全都初始化为零,因为这样会让迭代后数值一致,只能获得一个特征,其他特征都是冗余的。多类别分类问题可转化为两类别分类,然后通过多次分类将所有类别分出来。
def predict(x):
results = sigmoid(all_Theta@x)
results = list(results)
max_value = max_num = 0
for i,result in enumerate(results):
if max_value < result:
max_value = result
max_num = i+1
if max_num == 10:
predict_num = 0
else:
predict_num = max_num
return predict_num
利用梯度下降迭代的的all_Theta算出任一输入的图片计算它的sigmoid函数的值,取其最大值求出该图片在哪个类别,该类别就是我们预测的类别。
def plot_single_image(image):
fig,ax = plt.subplots(figsize=(2,2))
ax.matshow(image.reshape(20,20).T,cmap='gray_r')
plt.xticks([])
plt.yticks([])
n=random.randint(0,100)
x = X[n,:]
plot_single_image(x[1:])
print('预测号码为{}'.format(predict(x)))
plt.show()