斯坦福CS231n 课程学习笔记--线性分类器(Assignment1代码实现)

最近学习了斯坦福的CS231n(winter 2016)系列课程,收获很大,作为深度学习以及卷积神经网络学习的入门很是完美。学习过程中,主要参考了知乎上几位同学的课程翻译,做得很好,在这里也对他们表示感谢,跟课程相关的很多资源都可以在该专栏中找到。推荐大家把每个笔记的翻译都完整的看一下。关于该课程视频的中文字幕也在翻译进行中,目前第一集已经翻译完成,感兴趣的同学可以也可以看看参与进去。此外,完成课程视频和笔记的阅读之后也对课程的3个Assignment进行了实现。接下来主要以对课程笔记总结和Assignment代码实现相结合的方式完成这一系列的博客。

本篇博客主要对Assignment1中的linear_svm、softmax、linear_classifier三个任务进行实现。在该页面中提供了任务初始代码下载 下载的代码中已经有了相关文件。我们只需要按照svm.ipynb和softmax.ipynb两个文件来一步步地做。关于numpy中一些数组、向量、矩阵的操作可以参见该教程(介绍了如何从矩阵中选取特定位置处的元素等等接下来会用到的操作)
这里我直接贴出自己的代码,相关注释已经下载文件之中:
1,linear_svm.py

import numpy as np
def svm_loss_naive(W, X, y, reg):
    dW = np.zeros(W.shape)   # initialize the gradient as zero
    # compute the loss and the gradient
    num_classes = W.shape[1]
    num_train = X.shape[0]
    loss = 0.0
    #遍历样本集中的每一个样本
    for i in xrange(num_train):  
        #计算得分,点乘  
        scores = X[i].dot(W) 
        #记录正确类别的得分   
        correct_class_score = scores[y[i]]
        #遍历C个类别
        for j in xrange(num_classes):
            if j == y[i]:    
                continue
            #计算折页损失,和梯度公式。公式推导在上篇博客中已经介绍过了
            margin = scores[j] - correct_class_score + 1   # note delta = 1
            if margin > 0:
                loss += margin
                dW[:, y[i]] += -X[i, :]     # compute the correct_class gradients
                dW[:, j] += X[i, :]         # compute the wrong_class gradients
    # Right now the loss is a sum over all training examples, but we want it
    # to be an average instead so we divide by num_train.
    loss /= num_train
    dW /= num_train
    # Add regularization to the loss.添加正则损失
    loss += 0.5 * reg * np.sum(W * W)
    dW += reg * W
    return loss, dW

#使用向量运算计算loss和梯度
def svm_loss_vectorized(W, X, y, reg):
    loss = 0.0
    dW = np.zeros(W.shape)   # initialize the gradient as zero
    #计算得分,是个矩阵N*C
    scores = X.dot(W)        # N by C
    num_train = X.shape[0]
    num_classes = W.shape[1]
    #记录所有样本的正确类别得分。该操作是获取1~N行中第y(数组n*1)个元素
    scores_correct = scores[np.arange(num_train), y]   # 1 by N
    scores_correct = np.reshape(scores_correct, (num_train, 1))  # N by 1
    #计算分数与正确分类的差
    margins = scores - scores_correct + 1.0     # N by C
    #正确分类处的误差为0
    margins[np.arange(num_train), y] = 0.0
    #小于0的位置也设置为0,即分类正确
    margins[margins <= 0] = 0.0
    loss += np.sum(margins) / num_train
    loss += 0.5 * reg * np.sum(W * W)
    # compute the gradient
    margins[margins > 0] = 1.0
    row_sum = np.sum(margins, axis=1)                  # 1 by N
    margins[np.arange(num_train), y] = -row_sum        
    dW += np.dot(X.T, margins)/num_train + reg * W     # D by C

    return loss, dW

2,softmax.py

import numpy as np
#原理和svm一样。也分为两种计算方法。
def softmax_loss_naive(W, X, y, reg):    

    # Initialize the loss and gradient to zero.
    loss = 0.0
    dW = np.zeros_like(W)    # D by C
    dW_each = np.zeros_like(W)
    num_train, dim = X.shape
    num_class = W.shape[1]
    f = X.dot(W)    # N by C
    # Considering the Numeric Stability
    #考虑数值稳定性。减去f分值中最大的项。见上篇博客
    f_max = np.reshape(np.max(f, axis=1), (num_train, 1))   # N by 1
    #计算归一化概率
    prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1, keepdims=True) # N by C
    y_trueClass = np.zeros_like(prob)
    y_trueClass[np.arange(num_train), y] = 1.0
    for i in xrange(num_train):
        for j in xrange(num_class):    
            loss += -(y_trueClass[i, j] * np.log(prob[i, j]))    
            dW_each[:, j] = -(y_trueClass[i, j] - prob[i, j]) * X[i, :]
        dW += dW_each
    loss /= num_train
    loss += 0.5 * reg * np.sum(W * W)
    dW /= num_train
    dW += reg * W

    return loss, dW

def softmax_loss_vectorized(W, X, y, reg):
    # Initialize the loss and gradient to zero.    
    loss = 0.0    
    dW = np.zeros_like(W)    # D by C    
    num_train, dim = X.shape

    f = X.dot(W)    # N by C
    # Considering the Numeric Stability
    f_max = np.reshape(np.max(f, axis=1), (num_train, 1))   # N by 1
    prob = np.exp(f - f_max) / np.sum(np.exp(f - f_max), axis=1, keepdims=True)
    y_trueClass = np.zeros_like(prob)
    y_trueClass[range(num_train), y] = 1.0    # N by C
    loss += -np.sum(y_trueClass * np.log(prob)) / num_train + 0.5 * reg * np.sum(W * W)
    dW += -np.dot(X.T, y_trueClass - prob) / num_train + reg * W

    return loss, dW

3,linear_classifier.py

from linear_svm import *
from softmax import *

class LinearClassifier(object):    

    def __init__(self):        
        self.W = None    

    def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100, batch_size=200, verbose=True):
        num_train, dim = X.shape
        # assume y takes values 0...K-1 where K is number of classes
        num_classes = np.max(y) + 1  
        if self.W is None:
            # lazily initialize W
            self.W = 0.001 * np.random.randn(dim, num_classes)   # D by C

        # Run stochastic gradient descent(Mini-Batch) to optimize W
        loss_history = []
        for it in xrange(num_iters): 
            X_batch = None
            y_batch = None
            # Sampling with replacement is faster than sampling without replacement.
            sample_index = np.random.choice(num_train, batch_size, replace=False)
            X_batch = X[sample_index, :]   # batch_size by D
            y_batch = y[sample_index]      # 1 by batch_size
            # evaluate loss and gradient
            loss, grad = self.loss(X_batch, y_batch, reg)
            loss_history.append(loss)

            # perform parameter update
            self.W += -learning_rate * grad
            if verbose and it % 100 == 0:
                print 'Iteration %d / %d: loss %f' % (it, num_iters, loss)

        return loss_history

    def predict(self, X):
        y_pred = np.zeros(X.shape[1])    # 1 by N
        y_pred = np.argmax(np.dot(self.W.T, X.T), axis=0)
        return y_pred

    def loss(self, X_batch, y_batch, reg): 
        pass

class LinearSVM(LinearClassifier):
  """ A subclass that uses the Multiclass SVM loss function """

  def loss(self, X_batch, y_batch, reg):
    return svm_loss_vectorized(self.W, X_batch, y_batch, reg)

class Softmax(LinearClassifier):
  """ A subclass that uses the Softmax + Cross-entropy loss function """

  def loss(self, X_batch, y_batch, reg):
    return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)

notebook上面的代码,其实主要是实现超参数调优的功能。

# softmax.ipynb

from cs231n.classifiers import Softmax
results = {}
best_val = -1
best_softmax = None
learning_rates = [5e-6, 1e-7, 5e-7]
regularization_strengths = [1e4, 5e4, 1e5]
params = [(x,y) for x in learning_rates for y in regularization_strengths ]
for lrate, regular in params:
    softmax = Softmax()
    loss_hist = softmax.train(X_train, y_train, learning_rate=lrate, reg=regular,
                             num_iters=700, verbose=True)
    y_train_pred = softmax.predict(X_train)
    accuracy_train = np.mean( y_train == y_train_pred)
    y_val_pred = softmax.predict(X_val)
    accuracy_val = np.mean(y_val == y_val_pred)
    results[(lrate, regular)] = (accuracy_train, accuracy_val)
    if(best_val < accuracy_val):
        best_val = accuracy_val
        best_softmax = softmax

# Print out results.
for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
    print 'lr %e reg %e train accuracy: %f val accuracy: %f' % (
                lr, reg, train_accuracy, val_accuracy)

print 'best validation accuracy achieved during cross-validation: %f' % best_val


# svm.ipynb

from cs231n.classifiers import LinearSVM
learning_rates = [1e-7, 5e-5]
regularization_strengths = [5e4, 1e5]

results = {}
best_val = -1   # The highest validation accuracy that we have seen so far.
best_svm = None # The LinearSVM object that achieved the highest validation rate.

iters= 1000
for lr in learning_rates:
    for rs in regularization_strengths:
        svm = LinearSVM()
        svm.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters)
        y_train_pred = svm.predict(X_train)
        acc_train = np.mean(y_train == y_train_pred)
        y_val_pred = svm.predict(X_val)
        acc_val = np.mean(y_val == y_val_pred)
        results[(lr, rs)] = (acc_train, acc_val)
        if best_val < acc_val:
            best_val = acc_val
            best_svm = svm

# Print out results.
for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
    print 'lr %e reg %e train accuracy: %f val accuracy: %f' % (
                lr, reg, train_accuracy, val_accuracy)

print 'best validation accuracy achieved during cross-validation: %f' % best_val

贴上几张运行过程中的截图:
斯坦福CS231n 课程学习笔记--线性分类器(Assignment1代码实现)_第1张图片
斯坦福CS231n 课程学习笔记--线性分类器(Assignment1代码实现)_第2张图片
斯坦福CS231n 课程学习笔记--线性分类器(Assignment1代码实现)_第3张图片

你可能感兴趣的:(深度学习,机器学习,卷积神经网络-CNN)