本文根据cs231n作业1整理而来。
本次作业可以学到:
① 实现一个全向量化的softmax分类器损失函数
② 实现全向量化的分析梯度表达式
③ 用数字梯度进行梯度检查
④ 使用验证集调整学习率和正则化系数
⑤ 使用随机梯度下降的算法优化损失函数
⑥ 可视化最终习得的权重
# Softmax损失函数, 循环实现
def softmax_loss_naive(W, X, y, reg):
"""
输入:
- W: (维度总数,种类)
- X: (样本批数,维度总数)
- y: (维度总数,)
- reg: (float)正则化系数
输入:
- 损失函数值
- 期望的梯度值W,(维度总数,)
"""
# 初始化损失值以及W的梯度值为0
loss = 0.0
dW = np.zeros_like(W)
num_classes = W.shape[1]
num_train = X.shape[0]
scores = np.dot(X, W)
# 为每一批计算损失值和梯度
for ii in range(num_train):
current_scores = scores[ii, :]
# 通过从得分向量中减去最大值来修复数值稳定性。
shift_scores = current_scores - np.max(current_scores)
# 为本例计算损失值
loss_ii = -shift_scores[y[ii]] + np.log(np.sum(np.exp(shift_scores)))
loss += loss_ii
for jj in range(num_classes):
softmax_score = np.exp(shift_scores[jj]) / np.sum(np.exp(shift_scores))
# 计算梯度
if jj == y[ii]:
dW[:, jj] += (-1 + softmax_score) * X[ii]
else:
dW[:, jj] += softmax_score * X[ii]
# 在批上求平均,以及添加正则化系数项
loss /= num_train
loss += reg * np.sum(W*W)
# 在批上求平均,以及添加正则化系数项的导数
dW /= num_train
loss += 2 * reg * W
return loss, dW
# Softmax损失函数, 向量化实现
def softmax_loss_vectorized(W, X, y, reg):
# 初始化损失值以及W的梯度值为0
loss = 0.0
dW = np.zeros_like(W)
num_train = X.shape[0]
# 计算得分,并修正数值
scores = np.dot(X, W)
shift_scores = scores - np.max(scores, axis=1)[..., np.newaxis]
# 计算softmax得分
softmax_scores = np.exp(shift_scores)/np.sum(np.exp(shift_scores), axis=1)[..., np.newaxis]
# 计算dScore
dScore = softmax_scores
dScore[range(num_train), y] = dScore[range(num_train),y] - 1
dW = np.dot(X.T, dScore)
dW /= num_train
dW += 2 * reg * W
# 计算交叉熵损失函数
correct_class_scores = np.choose(y, shift_scores.T)
loss = -correct_class_scores + np.log(np.sum(np.exp(shift_scores), axis=1))
loss = np.num(loss)
# 平均损失函数值,并且正则化
loss /= num_train
loss += reg * np.sum(W*W)
return loss, dW
# 使用验证集调整超参数(正则化系数和学习率)。试验不同的正则化系数和学习率,
# 我们在验证集上分类的精确度应该超过0.35
from cs231n.classifiers import Softmax
results = {}
best_val = -1
best_softmax = None
learning_rates = [1e-7, 5e-7]
regularization_strengths = [2.5e4, 5e4]
num_splt_lr = 3
num_splt_rs = 8
for i in range(num_splt_lr):
for j in range(num_splt_rs):
learning_rate_ij = learning_rates[0] + i * (learning_rates[1] - learning_rates[0]) / num_splt_lr
reg_ij = regularization_strengths[0] + j * (regularization_strengths[1] - regularization_strengths[0])/ num_splt_rs
softmax = Softmax()
loss_hist = softmax.train(X_train, y_train, learning_rate=learning_rate_ij, reg=reg_ij,
num_iters=1500, verbose=False)
y_train_pred = softmax.predict(X_train)
accuracy_train = np.mean(y_train == y_train_pred)
y_val_pred = softmax.predict(X_val)
accuracy_val = np.mean(y_val == y_val_pred)
results[(learning_rate_ij, reg_ij)] = (accuracy_train, accuracy_val)
if accuracy_val > best_val:
best_val = accuracy_val
best_softmax = softmax
# 输出结果
for lr, reg in sorted(results):
train_accuacy, val_accuracy = results[(lr, reg)]
print('lr %e reg %e train accuracy: %f val accuracy:%f' % (lr, reg, train_accuacy, val_accuracy))
print('best validation accuracy achieved during cross-validation:%f' % best_val)
# 利用测试集评估
y_test_pred = best_softmax.predict(X_test)
test_accuracy = np.mean(y_test == y_test_pred)
print('softmax on raw pixels final test set accuracy:%f' % (test_accuracy,))
# softmax on raw pixels final test set accuracy:0.344000
# 使每个类别学习的权重可视化
w = best_softmax.W[:-1,:] # 剔除偏置
w = w.reshape(32, 32, 3, 10)
w_min, w_max = np.min(w), np.max(w)
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
for i in range(10):
plt.subplot(2, 5, i+1)
# 约束权值在0-255之间
wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)
plt.imshow(wimg.astype('uint8'))
plt.axis('off')
plt.title(classes[i])
最终输出结果如下图所示: