前面的代码不贴了,还是跟KNN那个作业一样,加载了数据集并且可视化了一下,下面贴出需要我们自己补充的:
def svm_loss_naive(W, X, y, reg):
dW = np.zeros(W.shape) # initialize the gradient as zero
# compute the loss and the gradient
num_classes = W.shape[1]
num_train = X.shape[0]
loss = 0.0
for i in range(num_train):
scores = X[i].dot(W)
correct_class_score = scores[y[i]]
for j in range(num_classes):
if j == y[i]:
continue
margin = scores[j] - correct_class_score + 1 # note delta = 1
if margin > 0:
loss += margin
dW [:,j] += X[i,:].T #j!=y[i]的行
dW [:,y[i]] += -X[i,:].T #j==y[i]的行,即正确分类的W的行向量,dW矩阵的梯度更新为"只需要计算没有满足边界 #值的分类的数量,然后乘以X就是梯度了"
# Right now the loss is a sum over all training examples, but we want it
# to be an average instead so we divide by num_train.
loss /= num_train
dW /= num_train #求平均值
# Add regularization to the loss.
loss += reg * np.sum(W * W)
dW += reg * W #添加正则化
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
return loss, dW
def svm_loss_vectorized(W, X, y, reg):
loss = 0.0
dW = np.zeros(W.shape) # initialize the gradient as zero
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
num_train = X.shape[0] #500
scores = np.dot(X, W) #点乘,得到评分
#print(scores.shape) #(500,10)
correct_class_scores = scores[np.arange(num_train), y] #经过打印发现是500*1的矩阵
#print(correct_class_scores.shape)
correct_class_scores = np.reshape(correct_class_scores, (num_train, -1))#等价correct_class_scores.reshape(num_train,-1)
#print(correct_class_scores.shape) # (500,1)
margin = scores - correct_class_scores + 1.0
margin[np.arange(num_train), y] = 0.0 #把所有y的位置置0
margin[margin <= 0] = 0.0 # max()公式的实现
loss += np.sum(margin) / num_train #计算loss
loss += 0.5 * reg * np.sum(W * W)
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
margin[margin > 0] = 1.0
row_sum = np.sum(margin, axis=1)
margin[np.arange(num_train), y] = -row_sum
dW = 1.0 / num_train * np.dot(X.T, margin) + reg * W
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
return loss, dW
def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
batch_size=200, verbose=False):
num_train, dim = X.shape
num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
if self.W is None: #如果W为空的话,随机初始一个
# lazily initialize W
self.W = 0.001 * np.random.randn(dim, num_classes)
# Run stochastic gradient descent to optimize W
loss_history = []
for it in range(num_iters):
X_batch = None
y_batch = None
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
indices=np.arange(num_train) #首先定义一个大小为训练集大小的数组
indices_result=np.sort(np.random.choice(indices,batch_size,replace=False))#随机从训练集中抓取batch_size行索 #引数据,并从小到大排好序备用
X_batch=X[indices_result] #从完整训练集中将上面随机提取地索引对应的行数据放到X_batch中
y_batch=y[indices_result]#把对应提取出来的行数据的标签
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
# evaluate loss and gradient
loss, grad = self.loss(X_batch, y_batch, reg)
loss_history.append(loss)
# perform parameter update
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
self.W = self.W - learning_rate*grad#更新梯度
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
if verbose and it % 100 == 0:
print('iteration %d / %d: loss %f' % (it, num_iters, loss))
return loss_history
最后有个问题:
Inline question 2
Describe what your visualized SVM weights look like, and offer a brief explanation for why they look they way that they do.
Y:YourAnswer: 每一个分类的权重矩阵和该类别代表的图像外观相似,原因是这些权重矩阵是从训练集学习而来的,其中的很多特征已经蕴含在里面