介绍了一个线性SVM分类器
为了使分类器在分类未知样本的时候,鲁棒性更好一点,我们希望正确分类的分数比错误分类分数大得多一点。这就得到了hinge损失函数,即求解线性SVM的损失。
上代码
1.补充linear_svm.py中的svm_loss_native方法
已经给出了loss,需要补充dW
def svm_loss_naive(W, X, y, reg):
"""
Structured SVM loss function, naive implementation (with loops).
Inputs have dimension D, there are C classes, and we operate on minibatches
of N examples.
Inputs:
- W: A numpy array of shape (D, C) containing weights.
- X: A numpy array of shape (N, D) containing a minibatch of data.
- y: A numpy array of shape (N,) containing training labels; y[i] = c means
that X[i] has label c, where 0 <= c < C.
- reg: (float) regularization strength
Returns a tuple of:
- loss as single float
- gradient with respect to weights W; an array of same shape as W
"""
dW = np.zeros(W.shape) # initialize the gradient as zero
# compute the loss and the gradient
num_classes = W.shape[1]
num_train = X.shape[0]
loss = 0.0
for i in range(num_train):
scores = X[i].dot(W)
correct_class_score = scores[y[i]]
for j in range(num_classes):
if j == y[i]:
continue
margin = scores[j] - correct_class_score + 1 # note delta = 1
if margin > 0:
loss += margin
dW[:,j]+=X[i].T
dW[:,y[i]]-=X[i].T
# Right now the loss is a sum over all training examples, but we want it
# to be an average instead so we divide by num_train.
loss /= num_train
dW/=num_train
# Add regularization to the loss.
loss += reg * np.sum(W * W)
dW+=reg*2*W
#############################################################################
# TODO: #
# Compute the gradient of the loss function and store it dW. #
# Rather that first computing the loss and then computing the derivative, #
# it may be simpler to compute the derivative at the same time that the #
# loss is being computed. As a result you may need to modify some of the #
# code above to compute the gradient. #
#############################################################################
return loss, dW
2.补充linear_svm.py中的svm_loss_vectorized方法
def svm_loss_vectorized(W, X, y, reg):
"""
Structured SVM loss function, vectorized implementation.
Inputs and outputs are the same as svm_loss_naive.
"""
loss = 0.0
dW = np.zeros(W.shape) # initialize the gradient as zero
#############################################################################
# TODO: #
# Implement a vectorized version of the structured SVM loss, storing the #
# result in loss. #
#############################################################################
scores=X.dot(W)
num_classes=W.shape[1]
num_train=X.shape[0]
scores_correct=scores[np.arange(num_train),y]
scores_correct=np.reshape(scores_correct,(num_train,-1))
margin=scores-scores_correct+1
margin=np.maximum(0,margin)
margin[np.arange(num_train),y]=0
loss+=np.sum(margin)/num_train
loss+=0.5*reg*np.sum(W*W)
#############################################################################
# END OF YOUR CODE #
#############################################################################
#############################################################################
# TODO: #
# Implement a vectorized version of the gradient for the structured SVM #
# loss, storing the result in dW. #
# #
# Hint: Instead of computing the gradient from scratch, it may be easier #
# to reuse some of the intermediate values that you used to compute the #
# loss. #
#############################################################################
margin[margin>0]=1
row_sum=np.sum(margin,axis=1)
margin[np.arange(num_train),y]=-row_sum
dW+=np.dot(X.T,margin)/num_train
dW+=reg*W
#############################################################################
# END OF YOUR CODE #
#############################################################################
return loss, dW
3.在文件 linear_classifier.py 里,完成SGD函数 LinearClassifier.train()
def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
batch_size=200, verbose=False):
"""
Train this linear classifier using stochastic gradient descent.
Inputs:
- X: A numpy array of shape (N, D) containing training data; there are N
training samples each of dimension D.
- y: A numpy array of shape (N,) containing training labels; y[i] = c
means that X[i] has label 0 <= c < C for C classes.
- learning_rate: (float) learning rate for optimization.
- reg: (float) regularization strength.
- num_iters: (integer) number of steps to take when optimizing
- batch_size: (integer) number of training examples to use at each step.
- verbose: (boolean) If true, print progress during optimization.
Outputs:
A list containing the value of the loss function at each training iteration.
"""
num_train, dim = X.shape
num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
if self.W is None:
# lazily initialize W
self.W = 0.001 * np.random.randn(dim, num_classes)
# Run stochastic gradient descent to optimize W
loss_history = []
for it in range(num_iters):
X_batch = None
y_batch = None
#########################################################################
# TODO: #
# Sample batch_size elements from the training data and their #
# corresponding labels to use in this round of gradient descent. #
# Store the data in X_batch and their corresponding labels in #
# y_batch; after sampling X_batch should have shape (dim, batch_size) #
# and y_batch should have shape (batch_size,) #
# #
# Hint: Use np.random.choice to generate indices. Sampling with #
# replacement is faster than sampling without replacement. #
#########################################################################
batch_inx=np.random.choice(num_train,batch_size)
X_batch=X[batch_inx,:]
y_batch=y[batch_inx]
#########################################################################
# END OF YOUR CODE #
#########################################################################
# evaluate loss and gradient
loss, grad = self.loss(X_batch, y_batch, reg)
loss_history.append(loss)
# perform parameter update
#########################################################################
# TODO: #
# Update the weights using the gradient and the learning rate. #
#########################################################################
self.W=self.W-learning_rate*grad
#########################################################################
# END OF YOUR CODE #
#########################################################################
if verbose and it % 100 == 0:
print('iteration %d / %d: loss %f' % (it, num_iters, loss))
return loss_history
4.在文件 linear_classifier.py 里,完成SGD函数 LinearClassifier.predict()
def predict(self, X):
"""
Use the trained weights of this linear classifier to predict labels for
data points.
Inputs:
- X: A numpy array of shape (N, D) containing training data; there are N
training samples each of dimension D.
Returns:
- y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
array of length N, and each element is an integer giving the predicted
class.
"""
y_pred = np.zeros(X.shape[0])
###########################################################################
# TODO: #
# Implement this method. Store the predicted labels in y_pred. #
###########################################################################
scores=X.dot(self.W)
y_pred=np.argmax(scores,axis=1)
###########################################################################
# END OF YOUR CODE #
###########################################################################
return y_pred
5.使用验证集去调整超参数(正则化强度和学习率)
for rate in learning_rates:
for regular in regularization_strengths:
svm=LinearSVM()
svm.train(X_train,y_train,learning_rate=rate,reg=regular,num_iters=1000)
y_train_pred=svm.predict(X_train)
accuracy_train=np.mean(y_train_pred==y_train)
y_val_pred=svm.predict(X_val)
accuracy_val=np.mean(y_val_pred==y_val)
results[(rate,regular)]=(accuracy_train,accuracy_val)
if (best_val<accuracy_val):
best_val=accuracy_val
best_svm=svm