A loss function tells how good our current classifier is
Given a dataset of examples
{ ( x i , y i ) } i = 1 N \{ (x_{i},y_{i})\}_{i=1}^{N} {(xi,yi)}i=1N
Where x i x_{i} xi is image and y i y_{i} yi is label
Loss function:
L = 1 N ∑ i L i ( f ( x i , W ) , y i ) L =\frac {1}{N} \sum_{i}L_{i}(f(x_{i},W),y_{i}) L=N1i∑Li(f(xi,W),yi)
The SVM loss has the form:
L i = ∑ j ≠ y i m a x ( 0 , s i − s y i + 1 ) L_{i} = \sum_{j\neq y_{i}}max(0,s_{i}-s_{y_{i}}+1) Li=j=yi∑max(0,si−syi+1)
the norm of array:
∥ V ∥ p = ( ∑ i = 0 n ∣ x i ∣ p ) 1 p \Vert \mathbf{V}\Vert_{p} = (\sum_{i=0}^{n}\vert x_{i}\vert^{p})^{\frac{1}{p}} ∥V∥p=(i=0∑n∣xi∣p)p1
spectral norm:
∥ A ∥ 2 = λ m a x ( A T A ) \Vert \mathbf{A}\Vert_{2}=\sqrt{\lambda_{max}(\mathbf{A}^\mathsf{T}\mathbf{A})} ∥A∥2=λmax(ATA)
用的数据集依然是cifar10,做下来准确率为25%,参数调整一下的话应该会有一定的提高,可惜现在还不太会调
import numpy as np
import pickle
import os
import joblib
# 准备数据集
def unpickle(file):
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
return dict
# 初始化
dataset = []
labelset = []
file_location = 'cifar-10-batches-py'
file_name_list = os.listdir(file_location)
# 依次读取每一个batch中的data和label并把他们合并在一个array中
for file_name in file_name_list:
if file_name[0:10] == 'data_batch':
data_batch = unpickle('cifar-10-batches-py/'+file_name)[b'data']
label_batch = unpickle('cifar-10-batches-py/'+file_name)[b'labels']
dataset.append(data_batch)
labelset.append(label_batch)
# 去除dataset和labelset中多余的括号,比如labelset原本是这样的形式[[1],[2],[3],[4],[5]]reshape成(5,1)
dataset = np.array(dataset)
dataset = np.reshape(dataset,(50000,3072))
ones = np.ones((50000,1))
xtrain = np.hstack((dataset,ones))
ytrain = np.reshape(labelset,(-1,1))
class LinearSVM(object):
""" A subclass that uses the Multiclass SVM loss function """
def __init__(self):
self.W = None
def loss_function(self, X, y,reg):
# Initialize loss and dW
loss = 0.0
dW = np.zeros(self.W.shape)
# Compute the loss and dW
num_train = X.shape[0]
num_classes = self.W.shape[1]
for i in range(num_train):
scores = np.dot(X[i], self.W)
for j in range(num_classes):
if j == y[i]:
margin = 0
else:
margin = scores[j] - scores[y[i]] + 1 # delta = 1
if margin > 0:
loss += margin
a = dW[:,j].T
a += X[i]
dW[:,j] = a.T
b = dW[:,y[i]].T
b -= X[i]
dW[:,y[i]] = b.T
# Divided by num_train
loss /= num_train
dW /= num_train
loss += 0.5 * reg * np.sum(self.W * self.W)
dW += reg * self.W
return loss, dW
def train(self, X, y, learning_rate = 1e-7, num_iters = 100,
batch_size = 200, print_flag = False,reg = 1e-5):
loss_history = []
num_train = X.shape[0]
dim = X.shape[1]
num_classes = np.max(y) + 1
# Initialize W
if self.W == None:
self.W = 0.001 * np.random.randn(dim, num_classes)
# iteration and optimization
for t in range(num_iters):
idx_batch = np.random.choice(num_train, batch_size, replace=True)
X_batch = X[idx_batch]
y_batch = y[idx_batch]
loss, dW = self.loss_function(X_batch, y_batch,reg)
loss_history.append(loss)
self.W += -learning_rate * dW
if print_flag and t%100 == 0:
print('iteration %d / %d: loss %f' % (t, num_iters, loss))
joblib.dump(self.W, "SVM.pkl")
return loss_history
def predict(self, X):
y_pred = np.zeros(X.shape[0])
scores = np.dot(X, self.W)
y_pred = np.argmax(scores, axis=1)
return y_pred
if __name__ =='__main__':
xtrain_data = xtrain[:10000]
ytrain_data = ytrain[:10000]
xtest_data = xtrain[10000:10200]
ytest_data = ytrain[10000:10200]
model = LinearSVM()
model.train(X=xtrain_data,y=ytrain_data,print_flag=True,num_iters=2000,reg=2.5e4,learning_rate=1e-7)
y = model.predict(xtest_data)
num = 0
for i in range(200):
if y[i] == ytest_data[i]:
num +=1
print(num/200)