作业要求见这里.
主要需要完成 KNN,SVM,Softmax分类器,还有一个两层的神经网络分类器的实现。
数据集CIFAR-10.
SVM(Support Vector Machine,支持向量机),是一种二类分类模型,其基本模型定义为特征空间上的即那个最大的线性分类器,器学习策略是间隔最大化,最终可转化为一个凸二次规划问题的解决。(线性支持向量机、非线性支持向量机)。
SVM的主要思想是建立一个超平面作为决策曲面,是的正例和反例之间的隔离边缘被最大化。对于二维线性可分情况,令H为把两类训练样本没有错误地分开的分类县,H1、H2分别为过各类中离分类线最近的样本且平行于分类线的直线,它们之间的距离讲座分类间隔。所谓最优分类线就是要求分类线不但能将两类正确分开,而且使分类间隔最大。在高维空间,最优分类线就成为最优分类线。
程序整体框架如下:包括classifiers和datasets文件夹,svm.py、data_utils.py、linear_classifier.py和linear_svm.py
from linear_classifier import LinearSVM
import time
import numpy as np #导入numpy的库函数
from datasets.data_utils import load_CIFAR10
import matplotlib.pyplot as plt
from classifiers.linear_svm import *
import math
cifar10_dir = 'E:/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
print('Training data shape: ',X_train.shape)
print('Training labels shape: ',y_train.shape)
print('Test data shape: ',X_test.shape)
print('Test labels shape: ',y_test.shape)
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
num_classes = len(classes)
samples_per_class = 7 #每个类别采样个数
for y,cls in enumerate(classes): #(0,plane),y返回元素位置,cls返回元素本身
idxs = np.flatnonzero(y_train==y) #找出标签中y类的位置
idxs = np.random.choice(idxs,samples_per_class,replace=False) #从中随机算出7个样本
for i,idx in enumerate(idxs): #对所选样本的位置和样本所对应的图片在训练集中的位置进行循环
plt_idx = i * num_classes + y + 1 #在子图中所占位置的计算
plt.subplot(samples_per_class,num_classes,plt_idx) #说明要画的子图的编号
plt.imshow(X_train[idx].astype('uint8')) #画图
plt.axis('off')
if i == 0:
plt.title(cls) #写上类别名
plt.show()
num_training = 49000 # 训练集 num_dev会从其中抽取一定数量的图片用于训练,减少训练时间
num_validation = 1000 # 验证集 在不同的学习率和正则参数下使用该验证集获取最高的正确率,最终找到最好的学习率和正则参数
num_test = 1000 # 测试集 在获取到最好的学习率和正则参数之后,测试最终的正确率
num_dev = 500 # 随机训练集 用于实现随机化梯度下降的
mask = range(num_training, num_training + num_validation) # 从训练数据x_train和y_train中获取验证集数据
X_val = X_train[mask]
y_val = y_train[mask]
mask = range(num_training) # 从训练数据x_train和y_train中获取全体训练集数据
X_train = X_train[mask]
y_train = y_train[mask]
mask = np.random.choice(num_training, num_dev, replace=False) # 从num_training中随机选取随机训练集数据
X_dev = X_train[mask]
y_dev = y_train[mask]
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
# 将x_train,x_val,x_test,x_dev这些n*32*32*3的图片集,转化成n*3072的矩阵;将每张图片拉伸成一维的矩阵,方便后面进行数据处理
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))
print('Training data shape: ', X_train.shape)
print('Validation data shape: ', X_val.shape)
print('Test data shape: ', X_test.shape)
print('dev data shape: ', X_dev.shape)
mean_image = np.mean(X_train, axis=0)
print(mean_image[:10])
plt.figure(figsize=(4,4))
plt.imshow(mean_image.reshape((32,32,3)).astype('uint8'))
plt.show()
# 将x_train,x_val,x_test,x_dev这些图片集进行去均值处理 ;统一量纲,和归一化操作类似,只是没有再除以方差而已
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])
print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)
W = np.random.randn(3073, 10) * 0.0001
loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.000005)
print('loss: %f' % (loss, ))
tic = time.time()
loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic))
tic = time.time()
loss_vectorized, _ = svm_loss_vectorized(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))
print('difference: %f' % (loss_naive - loss_vectorized))
tic = time.time()
_, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Naive loss and gradient: computed in %fs' % (toc - tic))
tic = time.time()
_, grad_vectorized = svm_loss_vectorized(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Vectorized loss and gradient: computed in %fs' % (toc - tic))
difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print('difference: %f' % difference)
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=2.5e4,num_iters=1500, verbose=True)
toc = time.time()
print('That took %fs' % (toc - tic))
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()
y_train_pred = svm.predict(X_train)
print('training accuracy: %f' % (np.mean(y_train == y_train_pred), ))
y_val_pred = svm.predict(X_val)
print('validation accuracy: %f' % (np.mean(y_val == y_val_pred), ))
#调参
#两个参数,学习率;正则化强度
learning_rates = [1e-7, 3e-7,5e-7,9e-7]
regularization_strengths = [2.5e4, 1e4,3e4,2e4]
results = {}
best_val = -1
best_svm = None
for learning_rate in learning_rates: # 循环执行代码;对不同的学习率以及正则化强度进行测试
for regularization_strength in regularization_strengths:
svm = LinearSVM() # learning_rate学习率;reg正则化强度;num_iters步长值;batch_size每一步使用的样本数量;verbose若为真则打印过程
loss_hist = svm.train(X_train, y_train, learning_rate=learning_rate, reg=regularization_strength,num_iters=1500, verbose=True)
y_train_pred = svm.predict(X_train)
y_val_pred = svm.predict(X_val)
y_train_acc = np.mean(y_train_pred==y_train)
y_val_acc = np.mean(y_val_pred==y_val)
results[(learning_rate,regularization_strength)] = [y_train_acc, y_val_acc]
if y_val_acc > best_val: # 判断优略
best_val = y_val_acc
best_svm = svm # 保存当前模型
for lr, reg in sorted(results):
train_accuracy, val_accuracy = results[(lr, reg)] # 存储数据
print('lr %e reg %e train accuracy: %f val accuracy: %f' % (lr, reg, train_accuracy, val_accuracy))
print('best validation accuracy achieved during cross-validation: %f' % best_val)
x_scatter = [math.log10(x[0]) for x in results]
y_scatter = [math.log10(x[1]) for x in results]
marker_size = 100
colors = [results[x][0] for x in results]
plt.subplot(1, 2, 1)
plt.scatter(x_scatter, y_scatter, marker_size, c=colors)
plt.colorbar()
plt.xlabel('log learning rate')
plt.ylabel('log regularization strength')
plt.title('CIFAR-10 training accuracy')
colors = [results[x][1] for x in results] # default size of markers is 20
plt.subplot(1, 2, 2)
plt.scatter(x_scatter, y_scatter, marker_size, c=colors)
plt.colorbar()
plt.xlabel('log learning rate')
plt.ylabel('log regularization strength')
plt.title('CIFAR-10 validation accuracy')
plt.show()
y_test_pred = best_svm.predict(X_test)
test_accuracy = np.mean(y_test == y_test_pred)
print('linear SVM on raw pixels final test set accuracy: %f' % test_accuracy)
#得到最优W时,W的可视化结果数据 W的图像可以看出权重的高低
w = best_svm.W[:-1,:]
w = w.reshape(32, 32, 3, 10)
w_min, w_max = np.min(w), np.max(w)
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] # 类别划分 列表
for i in range(10):
plt.subplot(2, 5, i + 1)
wimg = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)
plt.imshow(wimg.astype('uint8'))
plt.axis('off')
plt.title(classes[i])
plt.show() # W最终学习成的图片
from builtins import range
import numpy as np
from random import shuffle
def svm_loss_naive(W, X, y, reg): # 使用循环实现的SVM loss函数;W :一个numpy 数组,维数为(D,C),存储权重;D为特征向量的维度,C为分类类别的数量
dW = np.zeros(W.shape) # 创建一个梯度 # X :一个numpy数组,维数为(N,D),存储一小批数据
num_classes = W.shape[1] # 划分的种类 # y : 一个numpy数组,维数为(N,),存储训练标签
num_train = X.shape[0] # 训练样本的数量 # reg :float,正则化强度
loss = 0.0 # 初始化损失
for i in range(num_train): #分别求每个训练样本的损失
scores = X[i].dot(W) # 计算每个样本的分数;计算当前W和当前训练图片X[i]在各个图片种类下的分数scores
correct_class_score = scores[y[i]] # 获得当前训练图片X[i]真实图片种类的分数correct_class_score
for j in range(num_classes): # 计算损失
if j == y[i]: # 如果当前的图片种类j,就是当前训练图片X[i]真实的图片种类y[i],那么由前面损失函数的定义可知,我们不需要继续执行
continue # 如果1不成立,我们就能计算出对于当前训练图片X[i],在图片种类j下的损失分量margin
margin = scores[j] - correct_class_score + 1 # hinge loss(max margin)
if margin > 0: # 由前面损失函数的定义可知loss只需要大于0的margin,所以如果margin小于0,那么就当0处理,接下来就没必要继续了
loss += margin
dW[:,j] += X[i]
dW[:, y[i]] += (-X[i])
loss /= num_train
dW /= reg * W # 加入正则化
return loss, dW # loss : 损失函数的值 ; dW : 权重W的梯度,和W大小相同的array
def svm_loss_vectorized(W, X, y, reg): # 结构化的SVM损失函数,使用向量来实现
dW = np.zeros(W.shape) # 初始化梯度为0
num_classes = W.shape[1]
num_train = X.shape[0]
loss = 0.0
scores = X.dot(W)
correct_class_scores = scores[range(num_train), list(y)].reshape(-1,1) #(N, 1)
margins = np.maximum(0, scores - correct_class_scores +1)
margins[range(num_train), list(y)] = 0
loss = np.sum(margins) / num_train + 0.5 * reg * np.sum(W * W)
coeff_mat = np.zeros((num_train, num_classes))
coeff_mat[margins > 0] = 1
coeff_mat[range(num_train), list(y)] = 0
coeff_mat[range(num_train), list(y)] = -np.sum(coeff_mat, axis=1)
dW = (X.T).dot(coeff_mat)
dW = dW/num_train + reg*W
return loss, dW
from __future__ import print_function
from builtins import range
from six.moves import cPickle as pickle
import numpy as np
import os
from imageio import imread
import platform
def load_pickle(f):
version = platform.python_version_tuple()
if version[0] == '2':
return pickle.load(f)
elif version[0] == '3':
return pickle.load(f, encoding='latin1')
raise ValueError("invalid python version: {}".format(version))
def load_CIFAR_batch(filename):
""" load single batch of cifar """
with open(filename, 'rb') as f:
datadict = load_pickle(f)
X = datadict['data']
Y = datadict['labels']
X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
Y = np.array(Y)
return X, Y
def load_CIFAR10(ROOT):
""" load all of cifar """
xs = []
ys = []
for b in range(1,6):
f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
X, Y = load_CIFAR_batch(f)
xs.append(X)
ys.append(Y)
Xtr = np.concatenate(xs)
Ytr = np.concatenate(ys)
del X, Y
Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
return Xtr, Ytr, Xte, Yte
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000,
subtract_mean=True):
"""
Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
it for classifiers. These are the same steps as we used for the SVM, but
condensed to a single function.
"""
# Load the raw CIFAR-10 data
cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# Subsample the data
mask = list(range(num_training, num_training + num_validation))
X_val = X_train[mask]
y_val = y_train[mask]
mask = list(range(num_training))
X_train = X_train[mask]
y_train = y_train[mask]
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]
# Normalize the data: subtract the mean image
if subtract_mean:
mean_image = np.mean(X_train, axis=0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
# Transpose so that channels come first
X_train = X_train.transpose(0, 3, 1, 2).copy()
X_val = X_val.transpose(0, 3, 1, 2).copy()
X_test = X_test.transpose(0, 3, 1, 2).copy()
# Package data into a dictionary
return {
'X_train': X_train, 'y_train': y_train,
'X_val': X_val, 'y_val': y_val,
'X_test': X_test, 'y_test': y_test,
}
def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):
"""
Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
TinyImageNet-200 have the same directory structure, so this can be used
to load any of them.
Inputs:
- path: String giving path to the directory to load.
- dtype: numpy datatype used to load the data.
- subtract_mean: Whether to subtract the mean training image.
Returns: A dictionary with the following entries:
- class_names: A list where class_names[i] is a list of strings giving the
WordNet names for class i in the loaded dataset.
- X_train: (N_tr, 3, 64, 64) array of training images
- y_train: (N_tr,) array of training labels
- X_val: (N_val, 3, 64, 64) array of validation images
- y_val: (N_val,) array of validation labels
- X_test: (N_test, 3, 64, 64) array of testing images.
- y_test: (N_test,) array of test labels; if test labels are not available
(such as in student code) then y_test will be None.
- mean_image: (3, 64, 64) array giving mean training image
"""
# First load wnids
with open(os.path.join(path, 'wnids.txt'), 'r') as f:
wnids = [x.strip() for x in f]
# Map wnids to integer labels
wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
# Use words.txt to get names for each class
with open(os.path.join(path, 'words.txt'), 'r') as f:
wnid_to_words = dict(line.split('\t') for line in f)
for wnid, words in wnid_to_words.items():
wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
class_names = [wnid_to_words[wnid] for wnid in wnids]
# Next load training data.
X_train = []
y_train = []
for i, wnid in enumerate(wnids):
if (i + 1) % 20 == 0:
print('loading training data for synset %d / %d'
% (i + 1, len(wnids)))
# To figure out the filenames we need to open the boxes file
boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
with open(boxes_file, 'r') as f:
filenames = [x.split('\t')[0] for x in f]
num_images = len(filenames)
X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
y_train_block = wnid_to_label[wnid] * \
np.ones(num_images, dtype=np.int64)
for j, img_file in enumerate(filenames):
img_file = os.path.join(path, 'train', wnid, 'images', img_file)
img = imread(img_file)
if img.ndim == 2:
## grayscale file
img.shape = (64, 64, 1)
X_train_block[j] = img.transpose(2, 0, 1)
X_train.append(X_train_block)
y_train.append(y_train_block)
# We need to concatenate all training data
X_train = np.concatenate(X_train, axis=0)
y_train = np.concatenate(y_train, axis=0)
# Next load validation data
with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
img_files = []
val_wnids = []
for line in f:
img_file, wnid = line.split('\t')[:2]
img_files.append(img_file)
val_wnids.append(wnid)
num_val = len(img_files)
y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
for i, img_file in enumerate(img_files):
img_file = os.path.join(path, 'val', 'images', img_file)
img = imread(img_file)
if img.ndim == 2:
img.shape = (64, 64, 1)
X_val[i] = img.transpose(2, 0, 1)
# Next load test images
# Students won't have test labels, so we need to iterate over files in the
# images directory.
img_files = os.listdir(os.path.join(path, 'test', 'images'))
X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
for i, img_file in enumerate(img_files):
img_file = os.path.join(path, 'test', 'images', img_file)
img = imread(img_file)
if img.ndim == 2:
img.shape = (64, 64, 1)
X_test[i] = img.transpose(2, 0, 1)
y_test = None
y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
if os.path.isfile(y_test_file):
with open(y_test_file, 'r') as f:
img_file_to_wnid = {}
for line in f:
line = line.split('\t')
img_file_to_wnid[line[0]] = line[1]
y_test = [wnid_to_label[img_file_to_wnid[img_file]]
for img_file in img_files]
y_test = np.array(y_test)
mean_image = X_train.mean(axis=0)
if subtract_mean:
X_train -= mean_image[None]
X_val -= mean_image[None]
X_test -= mean_image[None]
return {
'class_names': class_names,
'X_train': X_train,
'y_train': y_train,
'X_val': X_val,
'y_val': y_val,
'X_test': X_test,
'y_test': y_test,
'class_names': class_names,
'mean_image': mean_image,
}
def load_models(models_dir):
"""
Load saved models from disk. This will attempt to unpickle all files in a
directory; any files that give errors on unpickling (such as README.txt)
will be skipped.
Inputs:
- models_dir: String giving the path to a directory containing model files.
Each model file is a pickled dictionary with a 'model' field.
Returns:
A dictionary mapping model file names to models.
"""
models = {}
for model_file in os.listdir(models_dir):
with open(os.path.join(models_dir, model_file), 'rb') as f:
try:
models[model_file] = load_pickle(f)['model']
except pickle.UnpicklingError:
continue
return models
def load_imagenet_val(num=None):
"""Load a handful of validation images from ImageNet.
Inputs:
- num: Number of images to load (max of 25)
Returns:
- X: numpy array with shape [num, 224, 224, 3]
- y: numpy array of integer image labels, shape [num]
- class_names: dict mapping integer label to class name
"""
imagenet_fn = 'cs231n/datasets/imagenet_val_25.npz'
if not os.path.isfile(imagenet_fn):
print('file %s not found' % imagenet_fn)
print('Run the following:')
print('cd cs231n/datasets')
print('bash get_imagenet_val.sh')
assert False, 'Need to download imagenet_val_25.npz'
f = np.load(imagenet_fn)
X = f['X']
y = f['y']
class_names = f['label_map'].item()
if num is not None:
X = X[:num]
y = y[:num]
return X, y, class_names
from __future__ import print_function
from builtins import range
from builtins import object
import numpy as np
from classifiers.linear_svm import *
# from softmax import *
class LinearClassifier(object):
def __init__(self):
self.W = None # learning_rate学习率;reg正则化强度;num_iters步长值;batch_size每一步使用的样本数量;verbose若为真则打印过程
def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,batch_size=200, verbose=False):
num_train, dim = X.shape # 训练W的代码中,先是获取了训练集图片数量num_train,图片种类数量num_classes。然后随机初始化了W
num_classes = np.max(y) + 1 # 假设y取0…K-1,其中K是类的数目
if self.W is None:
# 初始化W矩阵
self.W = 0.001 * np.random.randn(dim, num_classes)
# 运行随机梯度下降优化W
loss_history = [] # 用于储存每次迭代的损失值
for it in range(num_iters): #开始训练num_iters步
X_batch = None
y_batch = None
batch_inx = np.random.choice(num_train,batch_size) # 选取部分训练样本;随机生成一个序列;从训练集x_train和y_train中再取出batch_size数量的数据集,再次减少训练时间
X_batch = X[batch_inx,:]
y_batch = y[batch_inx]
loss, grad = self.loss(X_batch, y_batch, reg) # 计算损失与梯度
loss_history.append(loss)
self.W = self.W - learning_rate * grad # 参数更新;梯度为正表示损失增大,应该减少,成负相关
if verbose and it % 100 == 0:
print('iteration %d / %d: loss %f' % (it, num_iters, loss))
return loss_history
def predict(self, X):
y_pred = np.zeros(X.shape[0])
#根据训练后的W矩阵计算分数
score = X.dot(self.W)
y_pred = np.argmax(score,axis=1) #找到得分中最大的值作为类别;计算每一行最大值
return y_pred
def loss(self, X_batch, y_batch, reg):
pass
class LinearSVM(LinearClassifier):
def loss(self, X_batch, y_batch, reg):
return svm_loss_vectorized(self.W, X_batch, y_batch, reg)
本文希望对大家有帮助,当然上文若有不妥之处,欢迎指正。
分享决定高度,学习拉开差距