lecture4一些关于链式法则的基本知识。
下面是对assignment1的代码一些关键点或者有意思实现的总结
参考答案:https://github.com/sharedeeply/cs231n-assignment-solution/blob/master/assignment1/
对于assigment1中使用的数据集,从其中读出来照片是一张被拉成一维向量的图片,所以输入X_train,X_test的行数为输入数据的个数,列数为图片的像素量。y是一个 ( n , ) (n,) (n,) 的矩阵,行数为数据量,每个值为 c l a s s class class 的值。
部分训练数据显示函数
# Visualize some examples from the dataset.
# We show a few examples of training images from each class.
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
num_classes = len(classes)
samples_per_class = 7
for y, cls in enumerate(classes):
idxs = np.flatnonzero(y_train == y)# 返回不为0的值所在的坐标
idxs = np.random.choice(idxs, samples_per_class, replace=False)# 不重复取samples_per_class个数据,按概率取值也是该函数
for i, idx in enumerate(idxs):
plt_idx = i * num_classes + y + 1 # 图片的计数从1开始
plt.subplot(samples_per_class, num_classes, plt_idx)
plt.imshow(X_train[idx].astype('uint8'))
plt.axis('off')
if i == 0:
plt.title(cls)
plt.show()
关于 k n n knn knn 对象的调用,在文件中是通过
from cs231n.classifiers import KNearestNeighbor
# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
语句实现的,这里需要介绍__iniy__.py文件的作用:
Python中package的标识,不能删除
定义__all__用来模糊导入
编写Python代码(不建议在__init__中写python模块,可以在包中在创建另外的模块来写,尽量保证__init__.py简单)
参考:https://www.cnblogs.com/AlwinXu/p/5598543.html
通过classifier文件夹中的__init__.py文件使得上述调用可以实现
k n n knn knn 对象的实现较为简单,不展开介绍,这里主要介绍一些没见过的语句
# 将train_sq复制为(num_train, num_test)大小的矩阵
train_sq = np.broadcast_to(train_sq, shape=(num_train, num_test))# train_sq需要为一个二维矩阵,不能为(n,)形式的矩阵
交叉验证的实现:
for k in k_choices:
# 进行交叉验证
acc = []
for i in range(num_folds):
x = X_train_folds[0:i] + X_train_folds[i+1:]
x = np.concatenate(x, axis=0) # 使用concatenate将4个训练集拼在一起
y = y_train_folds[0:i] + y_train_folds[i+1:]
y = np.concatenate(y) # 对label使用同样的操作
test_x = X_train_folds[i]
test_y = y_train_folds[i]
classifier = KNearestNeighbor() # 定义model
classifier.train(x, y) # 将训练集读入
dist = classifier.compute_distances_no_loops(test_x) # 计算距离矩阵
y_pred = classifier.predict_labels(dist, k) # 预测结果
accuracy = np.mean(y_pred == test_y) # 计算准确率
acc.append(accuracy)
k_to_accuracies[k] = acc # 计算交叉验证的平均准确率
np.hstack()# 在水平方向平铺
np.vstack()# 在竖直方向堆叠
关于SVM参数更新(本来以为很简单,但在实现过程中还是发现有很多地方不清楚,实现算法果然是最好的学习方法)
def svm_loss_vectorized(W, X, y, reg):
"""
Structured SVM loss function, vectorized implementation.
Inputs and outputs are the same as svm_loss_naive.
"""
loss = 0.0
dW = np.zeros(W.shape) # initialize the gradient as zero
#############################################################################
# TODO:
# Implement a vectorized version of the structured SVM loss, storing the #
# result in loss. #
#############################################################################
num_train = X.shape[0] # 得到样本的数目
scores = np.dot(X, W) # 计算所有的得分
y_score = scores[np.arange(num_train), y].reshape((-1, 1)) # 这句话是将每一行数据所属类别(即y的值)处的值取出
mask = (scores - y_score + 1) > 0 # 有效的score下标
scores = (scores - y_score + 1) * mask # 有效的得分
loss = (np.sum(scores) - num_train * 1) / num_train # 去掉每个样本多加的对应label得分,然后平均(算有效score的时候应该不算y所属类别的预测值)
loss += reg * np.sum(W * W)
#############################################################################
# END OF YOUR CODE #
#############################################################################
#############################################################################
# TODO:
# Implement a vectorized version of the gradient for the structured SVM #
# loss, storing the result in dW. #
# #
# Hint: Instead of computing the gradient from scratch, it may be easier #
# to reuse some of the intermediate values that you used to compute the #
# loss. #
#############################################################################
# dw = x.T * dl/ds
ds = np.ones_like(scores) # 初始化ds
ds *= mask # 有效的score梯度为1,无效的为0(需要更细的值只有max取值不为0位置处的值)
ds[np.arange(num_train), y] = -1 * (np.sum(mask, axis=1) - 1) # 每个样本对应label的梯度计算了(有效的score次),取负号(在求导时,数据原来类别处的导数为每行mask的和减1,减1是因为类别处的score也算一个有效值),其余有效score处的导数为1
dW = np.dot(X.T, ds) / num_train # 平均
dW += 2 * reg * W # 加上正则项的梯度
#############################################################################
# END OF YOUR CODE #
#############################################################################
return loss, dW
def softmax_loss_vectorized(W, X, y, reg):
"""
Softmax loss function, vectorized version.
Inputs and outputs are the same as softmax_loss_naive.
"""
# Initialize the loss and gradient to zero.
loss = 0.0
dW = np.zeros_like(W)
#############################################################################
# TODO: Compute the softmax loss and its gradient using no explicit loops. #
# Store the loss in loss and the gradient in dW. If you are not careful #
# here, it is easy to run into numeric instability. Don't forget the #
# regularization! #
#############################################################################
scores = np.dot(X, W) # 计算得分
scores -= np.max(scores, axis=1, keepdims=True) # 数值稳定性,这里实际上是减去一个常数对求导没有影响
scores = np.exp(scores) # 取指数
scores /= np.sum(scores, axis=1, keepdims=True) # 计算softmax
ds = np.copy(scores) # 初始化loss对scores的梯度
ds[np.arange(X.shape[0]), y] -= 1 # 求出scores的梯度(对softMax求导之后得到对数据相应类别处的值减1,参考:https://www.jianshu.com/p/c02a1fbffad6)
dW = np.dot(X.T, ds) # 求出w的梯度
loss = scores[np.arange(X.shape[0]), y] # 计算loss
loss = -np.log(loss).sum() #求交叉熵
loss /= X.shape[0]
dW /= X.shape[0]
loss += reg * np.sum(W * W)
dW += 2 * reg * W
#############################################################################
# END OF YOUR CODE #
#############################################################################
return loss, dW
这个的求导实际上是将softmax与SVM合并,即softmax求导和Max函数的求导
def loss(self, X, y=None, reg=0.0):
"""
Compute the loss and gradients for a two layer fully connected neural
network.
Inputs:
- X: Input data of shape (N, D). Each X[i] is a training sample.
- y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
an integer in the range 0 <= y[i] < C. This parameter is optional; if it
is not passed then we only return scores, and if it is passed then we
instead return the loss and gradients.
- reg: Regularization strength.
Returns:
If y is None, return a matrix scores of shape (N, C) where scores[i, c] is
the score for class c on input X[i].
If y is not None, instead return a tuple of:
- loss: Loss (data loss and regularization loss) for this batch of training
samples.
- grads: Dictionary mapping parameter names to gradients of those parameters
with respect to the loss function; has the same keys as self.params.
"""
# Unpack variables from the params dictionary
W1, b1 = self.params['W1'], self.params['b1']
W2, b2 = self.params['W2'], self.params['b2']
N, D = X.shape
# Compute the forward pass
scores = None
#############################################################################
# TODO: Perform the forward pass, computing the class scores for the input. #
# Store the result in the scores variable, which should be an array of #
# shape (N, C). #
#############################################################################
s1 = np.dot(X, W1) + b1 # (N, H)
s1_act = (s1 > 0) * s1
scores = np.dot(s1_act, W2) + b2 # (N, C)
#############################################################################
# END OF YOUR CODE #
#############################################################################
# If the targets are not given then jump out, we're done
if y is None:
return scores
# Compute the loss
loss = None
#############################################################################
# TODO: Finish the forward pass, and compute the loss. This should include #
# both the data loss and L2 regularization for W1 and W2. Store the result #
# in the variable loss, which should be a scalar. Use the Softmax #
# classifier loss. #
#############################################################################
scores -= np.max(scores, axis=1, keepdims=True) # 数值稳定性
scores = np.exp(scores)
scores /= np.sum(scores, axis=1, keepdims=True) # softmax
loss = -np.log(scores[np.arange(N), y]).sum()
loss /= X.shape[0]
loss += reg * np.sum(W1**2)
loss += reg * np.sum(W2**2)
#############################################################################
# END OF YOUR CODE #
#############################################################################
# Backward pass: compute gradients
grads = {}
#############################################################################
# TODO: Compute the backward pass, computing the derivatives of the weights #
# and biases. Store the results in the grads dictionary. For example, #
# grads['W1'] should store the gradient on W1, and be a matrix of same size #
#############################################################################
ds2 = np.copy(scores) # 计算ds
ds2[np.arange(X.shape[0]), y] -= 1
ds2 = ds2 / X.shape[0]
grads['W2'] = np.dot(s1_act.T, ds2) + 2 * reg * W2
grads['b2'] = np.sum(ds2, axis=0)
ds1 = np.dot(ds2, W2.T)
ds1 = (s1 > 0) * ds1
grads['W1'] = np.dot(X.T, ds1) + 2 * reg * W1
grads['b1'] = np.sum(ds1, axis=0)
#############################################################################
# END OF YOUR CODE #
#############################################################################
return loss, grads
忽略颜色信息提取纹理信息
def hog_feature(im):
"""Compute Histogram of Gradient (HOG) feature for an image
Modified from skimage.feature.hog
http://pydoc.net/Python/scikits-image/0.4.2/skimage.feature.hog
Reference:
Histograms of Oriented Gradients for Human Detection
Navneet Dalal and Bill Triggs, CVPR 2005
Parameters:
im : an input grayscale or rgb image
Returns:
feat: Histogram of Gradient (HOG) feature
"""
# convert rgb to grayscale if needed
if im.ndim == 3:
image = rgb2gray(im)
else:
image = np.at_least_2d(im)
sx, sy = image.shape # image size
orientations = 9 # number of gradient bins
cx, cy = (8, 8) # pixels per cell
gx = np.zeros(image.shape)
gy = np.zeros(image.shape)
gx[:, :-1] = np.diff(image, n=1, axis=1) # compute gradient on x-direction 计算y方向的梯度
gy[:-1, :] = np.diff(image, n=1, axis=0) # compute gradient on y-direction 计算x方向的梯度
grad_mag = np.sqrt(gx ** 2 + gy ** 2) # gradient magnitude
grad_ori = np.arctan2(gy, (gx + 1e-15)) * (180 / np.pi) + 90 # gradient orientation
n_cellsx = int(np.floor(sx / cx)) # number of cells in x
n_cellsy = int(np.floor(sy / cy)) # number of cells in y
# compute orientations integral images
orientation_histogram = np.zeros((n_cellsx, n_cellsy, orientations))
for i in range(orientations):
# create new integral image for this orientation
# isolate orientations in this range
temp_ori = np.where(grad_ori < 180 / orientations * (i + 1),
grad_ori, 0)# np.where是说满足条件的位置取grad_ori处的值,不满足条件的取0值
temp_ori = np.where(grad_ori >= 180 / orientations * i,
temp_ori, 0)
# select magnitudes for those orientations
cond2 = temp_ori > 0
temp_mag = np.where(cond2, grad_mag, 0)
orientation_histogram[:,:,i] = uniform_filter(temp_mag, size=(cx, cy))[int(cx/2)::cx, int(cy/2)::cy]#uiform_filter均值滤波,以一定大小的kernel,取kernel中所有值和的平均替代中心处的值,输入输出的大小相同,所以0:cx/2的值为填充的值计算的,所以从cx/2开始算。 int(cx/2)::cx是说从int(cx/2)到最后以cx为间隔取值。该方法求histogram很巧妙。
return orientation_histogram.ravel()
忽略纹理信息提取颜色信息
def color_histogram_hsv(im, nbin=10, xmin=0, xmax=255, normalized=True):
"""
Compute color histogram for an image using hue.
Inputs:
- im: H x W x C array of pixel data for an RGB image.
- nbin: Number of histogram bins. (default: 10)
- xmin: Minimum pixel value (default: 0)
- xmax: Maximum pixel value (default: 255)
- normalized: Whether to normalize the histogram (default: True)
Returns:
1D vector of length nbin giving the color histogram over the hue of the
input image.
"""
ndim = im.ndim
bins = np.linspace(xmin, xmax, nbin+1)
hsv = matplotlib.colors.rgb_to_hsv(im/xmax) * xmax# rgb转hsv
imhist, bin_edges = np.histogram(hsv[:,:,0], bins=bins, density=normalized)
imhist = imhist * np.diff(bin_edges)
# return histogram
return imhist