通过搭建两层神经网络对图片进行分类。
1.定义一个类:
class TwoLayerNet(object):
def __init__(self, input_size, hidden_size, output_size, std=1e-4):
self.params = {}
self.params['W1'] = std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
2.定义损失函数
def loss(self, X, y=None, reg=0.0):
# Unpack variables from the params dictionary
W1, b1 = self.params['W1'], self.params['b1']
W2, b2 = self.params['W2'], self.params['b2']
N, D = X.shape
# Compute the forward pass
scores = None
s1 = np.dot(X, W1) + b1 # (N, H)
s1_act = (s1 > 0) * s1
scores = np.dot(s1_act, W2) + b2 # (N, C)
# If the targets are not given then jump out, we're done
if y is None:
return scores
# Compute the loss
loss = None
scores -= np.max(scores, axis=1, keepdims=True) # 数值稳定性
scores = np.exp(scores)
scores /= np.sum(scores, axis=1, keepdims=True) # softmax
loss = -np.log(scores[np.arange(N), y]).sum()
loss /= X.shape[0]
loss += reg * np.sum(W1**2)
loss += reg * np.sum(W2**2)
# Backward pass: compute gradients
grads = {}
ds2 = np.copy(scores) # 计算ds
ds2[np.arange(X.shape[0]), y] -= 1
ds2 = ds2 / X.shape[0]
grads['W2'] = np.dot(s1_act.T, ds2) + 2 * reg * W2
grads['b2'] = np.sum(ds2, axis=0)
ds1 = np.dot(ds2, W2.T)
ds1 = (s1 > 0) * ds1
grads['W1'] = np.dot(X.T, ds1) + 2 * reg * W1
grads['b1'] = np.sum(ds1, axis=0)
return loss, grads
3.训练网络
def train(self, X, y, X_val, y_val,
learning_rate=1e-3, learning_rate_decay=0.95,
reg=5e-6, num_iters=100,
batch_size=200, verbose=False):
num_train = X.shape[0]
iterations_per_epoch = max(num_train / batch_size, 1)
# Use SGD to optimize the parameters in self.model
loss_history = []
train_acc_history = []
val_acc_history = []
for it in range(num_iters):
X_batch = None
y_batch = None
idx = np.random.choice(range(num_train), batch_size)
X_batch = X[idx]
y_batch = y[idx]
# Compute loss and gradients using the current minibatch
loss, grads = self.loss(X_batch, y=y_batch, reg=reg)
loss_history.append(loss)
for p in ['W1', 'W2', 'b1', 'b2']:
self.params[p] -= learning_rate * grads[p]
if verbose and it % 100 == 0:
print('iteration %d / %d: loss %f' % (it, num_iters, loss))
# Every epoch, check train and val accuracy and decay learning rate.
if it % iterations_per_epoch == 0:
# Check accuracy
train_acc = (self.predict(X_batch) == y_batch).mean()
val_acc = (self.predict(X_val) == y_val).mean()
train_acc_history.append(train_acc)
val_acc_history.append(val_acc)
# Decay learning rate
learning_rate *= learning_rate_decay
return {
'loss_history': loss_history,
'train_acc_history': train_acc_history,
'val_acc_history': val_acc_history,
}
4.预测
def predict(self, X):
y_pred = None
scores = self.loss(X)
y_pred = np.argmax(scores, axis=1)
return y_pred
5.CIFAR-10分类结果
相关参数:
hidden_size : 50
learning_rate : 1e-3
regularization :0.25
num_iters : 2000
batch_size : 200
learning_rate_decay : 0.95
Test accuracy: 0.51 best val acc: 0.501
Inline Question
Now that you have trained a Neural Network classifier, you may find that your testing accuracy is much lower than the training accuracy. In what ways can we decrease this gap? Select all that apply.
Your answer: 1 和 3
Your explanation: 增大数据和增加正则化强度都能够提高泛化能力,但是增加隐藏节点会使得model更加的过拟合