cs231n课后SVM作业

用到的公式

A loss function tells how good our current classifier is
Given a dataset of examples
{ ( x i , y i ) } i = 1 N \{ (x_{i},y_{i})\}_{i=1}^{N} {(xi,yi)}i=1N
Where x i x_{i} xi is image and y i y_{i} yi is label
Loss function:
L = 1 N ∑ i L i ( f ( x i , W ) , y i ) L =\frac {1}{N} \sum_{i}L_{i}(f(x_{i},W),y_{i}) L=N1iLi(f(xi,W),yi)
The SVM loss has the form:
L i = ∑ j ≠ y i m a x ( 0 , s i − s y i + 1 ) L_{i} = \sum_{j\neq y_{i}}max(0,s_{i}-s_{y_{i}}+1) Li=j=yimax(0,sisyi+1)
the norm of array:
∥ V ∥ p = ( ∑ i = 0 n ∣ x i ∣ p ) 1 p \Vert \mathbf{V}\Vert_{p} = (\sum_{i=0}^{n}\vert x_{i}\vert^{p})^{\frac{1}{p}} Vp=(i=0nxip)p1
spectral norm:
∥ A ∥ 2 = λ m a x ( A T A ) \Vert \mathbf{A}\Vert_{2}=\sqrt{\lambda_{max}(\mathbf{A}^\mathsf{T}\mathbf{A})} A2=λmax(ATA)

完成作业

用的数据集依然是cifar10,做下来准确率为25%,参数调整一下的话应该会有一定的提高,可惜现在还不太会调

import numpy as np
import pickle
import os
import joblib

# 准备数据集
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict
# 初始化
dataset = []
labelset = []
file_location = 'cifar-10-batches-py'
file_name_list = os.listdir(file_location)
# 依次读取每一个batch中的data和label并把他们合并在一个array中
for file_name in file_name_list:
   if file_name[0:10] == 'data_batch':
        data_batch = unpickle('cifar-10-batches-py/'+file_name)[b'data']
        label_batch = unpickle('cifar-10-batches-py/'+file_name)[b'labels']
        dataset.append(data_batch)
        labelset.append(label_batch)
# 去除dataset和labelset中多余的括号,比如labelset原本是这样的形式[[1],[2],[3],[4],[5]]reshape成(5,1)
dataset = np.array(dataset)
dataset = np.reshape(dataset,(50000,3072))
ones = np.ones((50000,1))
xtrain = np.hstack((dataset,ones))
ytrain = np.reshape(labelset,(-1,1))

class LinearSVM(object):
    """ A subclass that uses the Multiclass SVM loss function """
    def __init__(self):
        self.W = None

    def loss_function(self, X, y,reg):
        # Initialize loss and dW
        loss = 0.0
        dW = np.zeros(self.W.shape)

        # Compute the loss and dW
        num_train = X.shape[0]
        num_classes = self.W.shape[1]
        for i in range(num_train):
            scores = np.dot(X[i], self.W)
            for j in range(num_classes):
                if j == y[i]:
                    margin = 0
                else:
                    margin = scores[j] - scores[y[i]] + 1    # delta = 1
                    if margin > 0:
                        loss += margin
                        a = dW[:,j].T
                        a += X[i]
                        dW[:,j] = a.T
                        b = dW[:,y[i]].T
                        b -= X[i]
                        dW[:,y[i]] = b.T

        # Divided by num_train
        loss /= num_train
        dW /= num_train
        loss += 0.5 * reg * np.sum(self.W * self.W)
        dW += reg * self.W
        return loss, dW

    def train(self, X, y, learning_rate = 1e-7, num_iters = 100,
             batch_size = 200, print_flag = False,reg = 1e-5):
        loss_history = []
        num_train = X.shape[0]
        dim = X.shape[1]
        num_classes = np.max(y) + 1

        # Initialize W
        if self.W == None:
            self.W = 0.001 * np.random.randn(dim, num_classes)

        # iteration and optimization
        for t in range(num_iters):
            idx_batch = np.random.choice(num_train, batch_size, replace=True)
            X_batch = X[idx_batch]
            y_batch = y[idx_batch]
            loss, dW = self.loss_function(X_batch, y_batch,reg)
            loss_history.append(loss)
            self.W += -learning_rate * dW

            if print_flag and t%100 == 0:
                print('iteration %d / %d: loss %f' % (t, num_iters, loss))
        joblib.dump(self.W, "SVM.pkl")
        return loss_history

    def predict(self, X):

        y_pred = np.zeros(X.shape[0])
        scores = np.dot(X, self.W)
        y_pred = np.argmax(scores, axis=1)

        return y_pred

if __name__ =='__main__':
    xtrain_data = xtrain[:10000]
    ytrain_data = ytrain[:10000]
    xtest_data = xtrain[10000:10200]
    ytest_data = ytrain[10000:10200]
    model = LinearSVM()
    model.train(X=xtrain_data,y=ytrain_data,print_flag=True,num_iters=2000,reg=2.5e4,learning_rate=1e-7)
    y = model.predict(xtest_data)
    num = 0
    for i in range(200):
        if y[i] == ytest_data[i]:
            num +=1
    print(num/200)

你可能感兴趣的:(cs231n课后SVM作业)