二、卷积神经网络
【补1】np.pad(array, pad_width, mode, **kwargs)
对矩阵进行填充,此处用0将图像矩阵进行填充,所用代码为
x_pad[m][n]=np.pad(x[m][n], pad, 'constant', constant_values=0)
当pad为单独一个数时,意味着在原数组上下左右角上填充pad*pad大小的数组,其余位置自动补齐
example:
a=np.ones((2,3))
>>> a
array([[ 1., 1., 1.],
[ 1., 1., 1.]])
>>> np.pad(a,2,'constant',constant_values=0)
array([[ 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 1., 1., 1., 0., 0.],
[ 0., 0., 1., 1., 1., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0.]])
【补2】np.where(condition, [x, y])用于寻找满足特定条件的数组的索引
example:
>>> x=np.arange(9.).reshape(3,3)
>>> x
array([[ 0., 1., 2.],
[ 3., 4., 5.],
[ 6., 7., 8.]])
>>> np.where(x>5)
(array([2, 2, 2], dtype=int64), array([0, 1, 2], dtype=int64))
前面为索引的第一个值,后面的数组对应索引的第二个值,此函数用于max pool反向传播时找到最大值位置用的
网络结构:conv--> relu --> 2*2 max pool --> batch_normalization --> affine-->relu-->affine-->softmax
【layers.py】
1.卷积操作(前向+反向)
def conv_forward_naive(x, w, b, conv_param):
"""
A naive implementation of the forward pass for a convolutional layer.
The input consists of N data points, each with C channels, height H and width
W. We convolve each input with F different filters, where each filter spans
all C channels and has height HH and width HH.
Input:
- x: Input data of shape (N, C, H, W)
- w: Filter weights of shape (F, C, HH, WW)
- b: Biases, of shape (F,)
- conv_param: A dictionary with the following keys:
- 'stride': The number of pixels between adjacent receptive fields in the
horizontal and vertical directions.
- 'pad': The number of pixels that will be used to zero-pad the input.
Returns a tuple of:
- out: Output data, of shape (N, F, H', W') where H' and W' are given by
H' = 1 + (H + 2 * pad - HH) / stride
W' = 1 + (W + 2 * pad - WW) / stride
- cache: (x, w, b, conv_param)
"""
stride=conv_param['stride']
pad=conv_param['pad']
F,C,HH,WW=w.shape
N,C,H,W=x.shape
H1=np.int32(1+(H+2*pad-HH)/stride)
W1=np.int32(1+(W+2*pad-WW)/stride)
out=np.zeros((N,F,H1,W1))
#pad
x_pad=np.zeros((N,C,H+2*pad,W+2*pad))
for m in range(N):
for n in range(C):
x_pad[m][n]=np.pad(x[m][n], pad, 'constant', constant_values=0)
for k in range(N):#N个样本
for l in range(F):
for i in range(H1):
for j in range(W1):
#a=np.sum(x_pad[k][0][i*stride:i*stride+HH,stride*j:stride*j+WW]*w[l][0])
#bb=np.sum(x_pad[k][1][i*stride:i*stride+HH,stride*j:stride*j+WW]*w[l][1])
#c = np.sum(x_pad[k][2][i*stride:i*stride+ HH,stride * j:stride * j + WW] * w[l][2])
#out[k][l][i][j]=a+bb+c+b[l]
out[k, l, i, j] = np.sum(w[l] * x_pad[k, :, i * stride:i * stride + HH, j * stride:j * stride + WW]) + b[l]
cache = (x, x_pad, w, b, conv_param)
return out, cache
def conv_backward_naive(dout, cache):
"""
卷积层的反向传播即为把卷积核上下左右翻转后与pad后的feature map再做卷积得到x
Inputs:
- dout: Upstream derivatives.
- cache: A tuple of (x, x_pad,w, b, conv_param) as in conv_forward_naive
Returns a tuple of:
- dx: Gradient with respect to x
- dw: Gradient with respect to w
- db: Gradient with respect to b
"""
dx, dw, db = None, None, None
#############################################################################
# TODO: Implement the convolutional backward pass. #
#############################################################################
x,x_pad,w,b,conv_param=cache
pad=conv_param['pad']
stride=conv_param['stride']
N,F,H1,W1=dout.shape
N,C,H,W=x.shape
HH=w.shape[2]
WW=w.shape[3]
dx=np.zeros((N,C,H,W))
dx_pad=np.zeros(x_pad.shape)
dw=np.zeros(w.shape)
db=np.zeros(b.shape)
for n in range(N):
for f in range(F):
for i in range(H1):
for j in range(W1):
db[f]+=dout[n,f,i,j] # out[k, l, i, j] = np.sum(w[l] * x_pad[k, :, i * stride:i * stride + HH, j * stride:j * stride + WW]) + b[l]
dw[f]+=dout[n,f,i,j]*x_pad[n,:,i*stride:i*stride+HH,j*stride:j*stride+WW]
dx_pad[n, :, i * stride:i * stride + HH, j * stride:j * stride + WW] += dout[n, f, i, j] * w[f]
dx=dx_pad[:,:,pad:pad+H,pad:pad+W].copy()
#############################################################################
# END OF YOUR CODE #
#############################################################################
return dx, dw, db
2. max pooling
def max_pool_forward_naive(x, pool_param):
"""
A naive implementation of the forward pass for a max pooling layer.
Inputs:
- x: Input data, of shape (N, C, H, W)
- pool_param: dictionary with the following keys:
- 'pool_height': The height of each pooling region
- 'pool_width': The width of each pooling region
- 'stride': The distance between adjacent pooling regions
Returns a tuple of:
- out: Output data
- cache: (x, pool_param)
"""
out = None
HH=pool_param['pool_height']
WW=pool_param['pool_width']
stride=pool_param['stride']
N,C,H,W=x.shape
H1=np.int32(1+(H-HH)/stride)
W1=np.int32(1+(W-WW)/stride)
out=np.zeros((N,C,H1,W1))
for n in range(N):
for c in range(C):
for i in range(H1):
for j in range(W1):
out[n,c,i,j]=np.max(x[n,c,i*stride:i*stride+HH,j*stride:j*stride+WW])
cache = (x, pool_param)
return out, cache
def max_pool_backward_naive(dout, cache):
"""
A naive implementation of the backward pass for a max pooling layer.
Inputs:
- dout: Upstream derivatives
- cache: A tuple of (x, pool_param) as in the forward pass.
Returns:
- dx: Gradient with respect to x
"""
dx = None
x,pool_param=cache
N,C,H,W=x.shape
HH=pool_param['pool_height']
WW=pool_param['pool_width']
stride=pool_param['stride']
H1 = np.int32(1 + (H - HH) / stride)
W1 = np.int32(1 + (W - WW) / stride)
dx=np.zeros(x.shape)
for n in range(N):
for c in range(C):
for i in range(H1):
for j in range(W1):
index=np.where(x[n,c]==np.max(x[n,c,i*stride:i*stride+HH,j*stride:j*stride+WW]))
length=len(index[0])
for m in range(length):
dx[n,c,index[0][m],index[1][m]]=dout[n,c,i,j]
return dx
3. Batch Normalization
此处不能用FC里面的BN,因为在CNN中,我们处理得都是二维矩阵,不再是(N,D)维的了,所以此处要重写一个spatial batch normalization,原理就是把(N,C,H,W)维度reshape成(N*H*W,C)维度,就可以调用之前的BN函数,目的是将一个通道看做一个特征维度,按通道求均值和方差
def spatial_batchnorm_forward(x, gamma, beta, bn_param):
"""
Computes the forward pass for spatial batch normalization.
Inputs:
- x: Input data of shape (N, C, H, W)
- gamma: Scale parameter, of shape (C,)
- beta: Shift parameter, of shape (C,)
- bn_param: Dictionary with the following keys:
- mode: 'train' or 'test'; required
- eps: Constant for numeric stability
- momentum: Constant for running mean / variance. momentum=0 means that
old information is discarded completely at every time step, while
momentum=1 means that new information is never incorporated. The
default of momentum=0.9 should work well in most situations.
- running_mean: Array of shape (D,) giving running mean of features
- running_var Array of shape (D,) giving running variance of features
Returns a tuple of:
- out: Output data, of shape (N, C, H, W)
- cache: Values needed for the backward pass
"""
out, cache = None, None
N,C,H,W=x.shape
out,cache=batchnorm_forward(x.transpose(0,2,3,1).reshape(N*H*W,C),gamma,beta,bn_param)
out=out.reshape([N,H,W,C]).transpose(0,3,1,2)
return out, cache
def spatial_batchnorm_backward(dout, cache):
"""
Computes the backward pass for spatial batch normalization.
Inputs:
- dout: Upstream derivatives, of shape (N, C, H, W)
- cache: Values from the forward pass
Returns a tuple of:
- dx: Gradient with respect to inputs, of shape (N, C, H, W)
- dgamma: Gradient with respect to scale parameter, of shape (C,)
- dbeta: Gradient with respect to shift parameter, of shape (C,)
"""
dx, dgamma, dbeta = None, None, None
N,C,H,W=dout.shape
dx,dgamma,dbeta=batchnorm_backward(dout.transpose(0,2,3,1).reshape(N*H*W,C),cache)
dx=dx.reshape(N,H,W,C).transpose(0,3,1,2)
return dx, dgamma, dbeta
【cnn.py】
此文件定义了网络的模型,并写出了loss和grads的求法
import numpy as np
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.layer_utils import *
class ThreeLayerConvNet(object):
"""
A three-layer convolutional network with the following architecture:
conv - relu - 2x2 max pool -batch_normalization- affine - relu - affine - softmax
The network operates on minibatches of data that have shape (N, C, H, W)
consisting of N images, each with height H and width W and with C input
channels.
"""
def __init__(self, input_dim=(3, 32, 32), num_filters=32, filter_size=7,
hidden_dim=100, num_classes=10, weight_scale=1e-3, reg=0.0,
dtype=np.float32):
"""
Initialize a new network.
Inputs:
- input_dim: Tuple (C, H, W) giving size of input data
- num_filters: Number of filters to use in the convolutional layer
- filter_size: Size of filters to use in the convolutional layer
- hidden_dim: Number of units to use in the fully-connected hidden layer
- num_classes: Number of scores to produce from the final affine layer.
- weight_scale: Scalar giving standard deviation for random initialization
of weights.
- reg: Scalar giving L2 regularization strength
- dtype: numpy datatype to use for computation.
"""
self.params = {}
self.reg = reg
self.dtype = dtype
C,H,W=input_dim
self.params['W1']=np.random.randn(num_filters,C,filter_size,filter_size)*weight_scale
self.params['b1']=np.zeros(num_filters)
self.params['W2'] = np.random.randn(int(num_filters * H * W / 4), hidden_dim) * weight_scale
self.params['b2']=np.zeros(hidden_dim)
self.params['W3']=np.random.randn(hidden_dim,num_classes)*weight_scale
self.params['b3']=np.zeros(num_classes)
self.params['gamma']=np.ones(num_filters)
self.params['beta']=np.zeros(num_filters)
for k, v in self.params.items():
self.params[k] = v.astype(dtype)
def loss(self, X, y=None):
"""
Evaluate loss and gradient for the three-layer convolutional network.
Input / output: Same API as TwoLayerNet in fc_net.py.
"""
W1, b1 = self.params['W1'], self.params['b1']
W2, b2 = self.params['W2'], self.params['b2']
W3, b3 = self.params['W3'], self.params['b3']
gamma,beta=self.params['gamma'],self.params['beta']
# pass conv_param to the forward pass for the convolutional layer
filter_size = W1.shape[2]
bn_param={
'mode':'test' if y is None else 'train',
}
conv_param = {'stride': 1, 'pad': int((filter_size - 1) / 2)}
# pass pool_param to the forward pass for the max-pooling layer
pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}
scores = None
self.cache={}
y1,self.cache['cache1']=conv_bn_relu_pool_forward(X,W1,b1,gamma,beta,conv_param,bn_param,pool_param)
y2,self.cache['cache2']=affine_relu_forward(y1,W2,b2)
scores,self.cache['cache3']=affine_forward(y2,W3,b3)
if y is None:
return scores
loss, grads = 0, {}
loss,dy=softmax_loss(scores,y)
loss+=0.5*self.reg*(np.sum(np.square(self.params['W1']))+np.sum(np.square(self.params['W2']))+np.sum(np.square(self.params['W3'])))
grad_term,grads['W3'],grads['b3']=affine_backward(dy,self.cache['cache3'])
grads['W3']+=self.reg*self.params['W3']
grad_term,grads['W2'],grads['b2']=affine_relu_backward(grad_term,self.cache['cache2'])
grads['W2']+=self.reg*self.params['W2']
dx,grads['W1'],grads['b1'],grads['gamma'],grads['beta']=conv_bn_relu_pool_backward(grad_term,self.cache['cache1'])
grads['W1']+=self.reg*self.params['W1']
return loss, grads
pass
仍是FC中的solver类,定义了train_op
【main.py】
定义主函数
import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.cnn import *
from cs231n.data_utils import load_CIFAR10
from cs231n.gradient_check import eval_numerical_gradient,eval_numerical_gradient_array
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.solver import Solver
def rel_error(x, y):
""" returns relative error """
return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
#导入数据
data =load_CIFAR10('E:\\Python\\deep learning CS231n\\assignment2\\cs231n\\datasets')
for k, v in data.items():
print('%s: ' % k, v.shape)
#train small data
num_train=100
small_data={
'X_train':data['X_train'][:num_train],
'y_train':data['y_train'][:num_train],
'X_val':data['X_val'],
'y_val':data['y_val'],
}
model=ThreeLayerConvNet(weight_scale=1e-2)
solver=Solver(model,small_data,num_epochs=10,batch_size=50,update_rule='adam',optim_config={
'learning_rate':1e-3,
},verbose=True,print_every=1)
solver.train()