c = exp_correct / exp_trans[i]
loss_data[i] = float©
两个数组做除法,结果也是数组(一维数组),把它直接赋值给另一个数组是无效的。
np.zeros_like创建的array是int型的加上dtype=float,转换类型
normalization以及向量化
# Normalize the data: subtract the mean image
mean_image = np.mean(X_train, axis=0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
# Reshape data to rows
X_train = X_train.reshape(num_training, -1)
X_val = X_val.reshape(num_validation, -1)
X_test = X_test.reshape(num_test, -1)
import numpy as np
import matplotlib.pyplot as plt
import math
class TwoLayerNet(object):
"""
A two-layer fully-connected neural network. The net has an input dimension of
N, a hidden layer dimension of H, and performs classification over C classes.
We train the network with a softmax loss function and L2 regularization on the
weight matrices. The network uses a ReLU nonlinearity after the first fully
connected layer.
In other words, the network has the following architecture:
input - fully connected layer - ReLU - fully connected layer - softmax
The outputs of the second fully-connected layer are the scores for each class.
"""
def __init__(self, input_size, hidden_size, output_size, std=1e-4):
"""
Initialize the model. Weights are initialized to small random values and
biases are initialized to zero. Weights and biases are stored in the
variable self.params, which is a dictionary with the following keys:
W1: First layer weights; has shape (D, H)
b1: First layer biases; has shape (H,)
W2: Second layer weights; has shape (H, C)
b2: Second layer biases; has shape (C,)
Inputs:
- input_size: The dimension D of the input data.
- hidden_size: The number of neurons H in the hidden layer.
- output_size: The number of classes C.
"""
self.params = {}
self.params['W1'] = std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
def loss(self, X, y=None, reg=0.0):
"""
Compute the loss and gradients for a two layer fully connected neural
network.
Inputs:
- X: Input data of shape (N, D). Each X[i] is a training sample.
- y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
an integer in the range 0 <= y[i] < C. This parameter is optional; if it
is not passed then we only return scores, and if it is passed then we
instead return the loss and gradients.
- reg: Regularization strength.
Returns:
If y is None, return a matrix scores of shape (N, C) where scores[i, c] is
the score for class c on input X[i].
If y is not None, instead return a tuple of:
- loss: Loss (data loss and regularization loss) for this batch of training
samples.
- grads: Dictionary mapping parameter names to gradients of those parameters
with respect to the loss function; has the same keys as self.params.
"""
# Unpack variables from the params dictionary
W1, b1 = self.params['W1'], self.params['b1']
W2, b2 = self.params['W2'], self.params['b2']
N, D = X.shape
# Compute the forward pass
scores = None
#############################################################################
# TODO: Perform the forward pass, computing the class scores for the input. #
# Store the result in the scores variable, which should be an array of #
# shape (N, C). #
#############################################################################
H1 = np.dot(X, W1) + b1
H1_ReLU = np.maximum(H1, 0)
scores = np.dot(H1_ReLU, W2) + b2
#############################################################################
# END OF YOUR CODE #
#############################################################################
# If the targets are not given then jump out, we're done
if y is None:
return scores
# Compute the loss
loss = None
#############################################################################
# TODO: Finish the forward pass, and compute the loss. This should include #
# both the data loss and L2 regularization for W1 and W2. Store the result #
# in the variable loss, which should be a scalar. Use the Softmax #
# classifier loss. So that your results match ours, multiply the #
# regularization loss by 0.5 #
#############################################################################
exp_scores = np.exp(scores)
exp_total = exp_scores.sum(axis=1).reshape((N, 1))
exp_norm = exp_scores / exp_total
data_loss = -1.0 / N * np.log(exp_norm[np.arange(N), y]).sum()
reg_loss = 0.5 * reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
loss = data_loss + reg_loss
#############################################################################
# END OF YOUR CODE #
#############################################################################
# Backward pass: compute gradients
grads = {}
#############################################################################
# TODO: Compute the backward pass, computing the derivatives of the weights #
# and biases. Store the results in the grads dictionary. For example, #
# grads['W1'] should store the gradient on W1, and be a matrix of same size #
#############################################################################
exp_1 = np.zeros_like(scores)
exp_1[np.arange(N), y] -= 1
grad_scores = (exp_norm + exp_1)/N
a = grad_scores
grads['W2'] = np.transpose(H1_ReLU).dot(a) + reg * W2
grads['b2'] = np.ones(N).dot(a)
ReLU_g = np.zeros_like(H1)
ReLU_g[H1 > 0] = 1
grad_H1 = a.dot(np.transpose(W2)) * ReLU_g
grads['W1'] = np.transpose(X).dot(grad_H1) + reg *W1
grads['b1'] = np.ones(N).dot(grad_H1)
#############################################################################
# END OF YOUR CODE #
#############################################################################
return loss, grads,
def train(self, X, y, X_val, y_val,
learning_rate=1e-3, learning_rate_decay=0.95,
reg=1e-5, num_iters=100,
batch_size=200, verbose=False):
"""
Train this neural network using stochastic gradient descent.
Inputs:
- X: A numpy array of shape (N, D) giving training data.
- y: A numpy array f shape (N,) giving training labels; y[i] = c means that
X[i] has label c, where 0 <= c < C.
- X_val: A numpy array of shape (N_val, D) giving validation data.
- y_val: A numpy array of shape (N_val,) giving validation labels.
- learning_rate: Scalar giving learning rate for optimization.
- learning_rate_decay: Scalar giving factor used to decay the learning rate
after each epoch.
- reg: Scalar giving regularization strength.
- num_iters: Number of steps to take when optimizing.
- batch_size: Number of training examples to use per step.
- verbose: boolean; if true print progress during optimization.
"""
num_train = X.shape[0]
iterations_per_epoch = max(num_train / batch_size, 1)
# Use SGD to optimize the parameters in self.model
loss_history = []
train_acc_history = []
val_acc_history = []
for it in xrange(num_iters):
X_batch = None
y_batch = None
#########################################################################
# TODO: Create a random minibatch of training data and labels, storing #
# them in X_batch and y_batch respectively. #
#########################################################################
batch_inx = np.random.choice(num_train, batch_size)
X_batch = X[batch_inx, :]
y_batch = y[batch_inx]
#########################################################################
# END OF YOUR CODE #
#########################################################################
# Compute loss and gradients using the current minibatch
loss, grads = self.loss(X_batch, y=y_batch, reg=reg)
loss_history.append(loss)
#########################################################################
# TODO: Use the gradients in the grads dictionary to update the #
# parameters of the network (stored in the dictionary self.params) #
# using stochastic gradient descent. You'll need to use the gradients #
# stored in the grads dictionary defined above. #
#########################################################################
self.params['W2'] = self.params['W2'] - grads['W2'] * learning_rate
self.params['W1'] = self.params['W1'] - grads['W1'] * learning_rate
self.params['b2'] = self.params['b2'] - grads['b2'] * learning_rate
self.params['b1'] = self.params['b1'] - grads['b1'] * learning_rate
#########################################################################
# END OF YOUR CODE #
#########################################################################
if verbose and it % 100 == 0:
print 'iteration %d / %d: loss %f' % (it, num_iters, loss)
# Every epoch, check train and val accuracy and decay learning rate.
if it % iterations_per_epoch == 0:
# Check accuracy
train_acc = (self.predict(X_batch) == y_batch).mean()
val_acc = (self.predict(X_val) == y_val).mean()
train_acc_history.append(train_acc)
val_acc_history.append(val_acc)
# Decay learning rate
learning_rate *= learning_rate_decay
return {
'loss_history': loss_history,
'train_acc_history': train_acc_history,
'val_acc_history': val_acc_history,
}
def predict(self, X):
"""
Use the trained weights of this two-layer network to predict labels for
data points. For each data point we predict scores for each of the C
classes, and assign each data point to the class with the highest score.
Inputs:
- X: A numpy array of shape (N, D) giving N D-dimensional data points to
classify.
Returns:
- y_pred: A numpy array of shape (N,) giving predicted labels for each of
the elements of X. For all i, y_pred[i] = c means that X[i] is predicted
to have class c, where 0 <= c < C.
"""
y_pred = None
###########################################################################
# TODO: Implement this function; it should be VERY simple! #
###########################################################################
score = self.loss(X)
y_pred = np.argmax(score, axis=1)
###########################################################################
# END OF YOUR CODE #
###########################################################################
return y_pred
import numpy as np
import matplotlib.pyplot as plt
from neural_net import TwoLayerNet
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
def rel_error(x, y):
""" returns relative error """
return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
# Create a small net and some toy data to check your implementations.
# Note that we set the random seed for repeatable experiments.
input_size = 4
hidden_size = 10
num_classes = 3
num_inputs = 5
def init_toy_model():
np.random.seed(0)
return TwoLayerNet(input_size, hidden_size, num_classes, std=1e-1)
def init_toy_data():
np.random.seed(1)
X = 10 * np.random.randn(num_inputs, input_size)
y = np.array([0, 1, 2, 2, 1])
return X, y
net = init_toy_model()
X, y = init_toy_data()
scores = net.loss(X)
print 'Your scores:'
print scores
print
print 'correct scores:'
correct_scores = np.asarray([
[-0.81233741, -1.27654624, -0.70335995],
[-0.17129677, -1.18803311, -0.47310444],
[-0.51590475, -1.01354314, -0.8504215 ],
[-0.15419291, -0.48629638, -0.52901952],
[-0.00618733, -0.12435261, -0.15226949]])
print correct_scores
print
# The difference should be very small. We get < 1e-7
print 'Difference between your scores and correct scores:'
print np.sum(np.abs(scores - correct_scores))
loss, _ = net.loss(X, y, reg=0.1)
correct_loss = 1.30378789133
# should be very small, we get < 1e-12
print 'Difference between your loss and correct loss:'
print np.sum(np.abs(loss - correct_loss))
from gradient_check import eval_numerical_gradient
# Use numeric gradient checking to check your implementation of the backward pass.
# If your implementation is correct, the difference between the numeric and
# analytic gradients should be less than 1e-8 for each of W1, W2, b1, and b2.
loss, grads = net.loss(X, y, reg=0.1)
# these should all be less than 1e-8 or so
for param_name in grads:
f = lambda W: net.loss(X, y, reg=0.1)[0]
param_grad_num = eval_numerical_gradient(f, net.params[param_name], verbose=False)
print '%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))
net = init_toy_model()
stats = net.train(X, y, X, y,
learning_rate=1e-1, reg=1e-5,
num_iters=100, verbose=True)
print 'Final training loss: ', stats['loss_history'][-1]
# plot the loss history
plt.plot(stats['loss_history'])
plt.xlabel('iteration')
plt.ylabel('training loss')
plt.title('Training Loss history')
plt.show()
from data_utils import load_CIFAR10
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
"""
Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
it for the two-layer neural net classifier. These are the same steps as
we used for the SVM, but condensed to a single function.
"""
# Load the raw CIFAR-10 data
cifar10_dir = 'cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# Subsample the data
mask = range(num_training, num_training + num_validation)
X_val = X_train[mask] #the last 1000 as validation
y_val = y_train[mask]
mask = range(num_training)
X_train = X_train[mask] #the first 49000 as training
y_train = y_train[mask]
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
# Normalize the data: subtract the mean image
mean_image = np.mean(X_train, axis=0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
# Reshape data to rows
X_train = X_train.reshape(num_training, -1)
X_val = X_val.reshape(num_validation, -1)
X_test = X_test.reshape(num_test, -1)
return X_train, y_train, X_val, y_val, X_test, y_test
# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print 'Train data shape: ', X_train.shape
print 'Train labels shape: ', y_train.shape
print 'Validation data shape: ', X_val.shape
print 'Validation labels shape: ', y_val.shape
print 'Test data shape: ', X_test.shape
print 'Test labels shape: ', y_test.shape
#input_size = 32 * 32 * 3
#hidden_size = 50
#num_classes = 10
#net = TwoLayerNet(input_size, hidden_size, num_classes)
# Train the network
#stats = net.train(X_train, y_train, X_val, y_val,
# num_iters=1000, batch_size=200,
# learning_rate=1e-4, learning_rate_decay=0.95,
# reg=0.5, verbose=True)
# Predict on the validation set
#val_acc = (net.predict(X_val) == y_val).mean()
#val_acc = np.sum(net.predict(X_val) == y_val)
#print 'Validation accuracy: ', val_acc
#plt.subplot(2, 1, 1)
#plt.plot(stats['loss_history'])
#plt.title('Loss history')
#plt.xlabel('Iteration')
#plt.ylabel('Loss')
#plt.subplot(2, 1, 2)
#plt.plot(stats['train_acc_history'], label='train')
#plt.plot(stats['val_acc_history'], label='val')
#plt.title('Classification accuracy history')
#plt.xlabel('Epoch')
#plt.ylabel('Clasification accuracy')
#plt.show()
#from cs231n.vis_utils import visualize_grid
# Visualize the weights of the network
#def show_net_weights(net):
# W1 = net.params['W1']
# W1 = W1.reshape(32, 32, 3, -1).transpose(3, 0, 1, 2)
# plt.imshow(visualize_grid(W1, padding=3).astype('uint8'))
# plt.gca().axis('off')
# plt.show()
#show_net_weights(net)
input_size = 32 * 32 * 3
hidden_size_choise = [x*100+50 for x in xrange(10)]
batch_size_choise = [200, 300, 400, 500]
learning_rate_choise = [1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 1e-1, 1]
reg_choice = [0.05, 0.1, 0.5, 5]
num_classes = 10
val_acc_list = []
for hidden_size in hidden_size_choise:
for batch_size in batch_size_choise:
for learning_rate in learning_rate_choise:
for reg in reg_choice:
net = TwoLayerNet(input_size, hidden_size, num_classes)
# Train the network
print'current reg', reg
print'current learning rate', learning_rate
print'current batch size', batch_size
print'current hidden size', hidden_size
stats = net.train(X_train, y_train, X_val, y_val,
learning_rate, 0.95,
reg, 1000, batch_size, True)
val_acc = (net.predict(X_val) == y_val).mean()
print 'Validation accuracy: ', val_acc
val_acc_list.append(val_acc)
# Predict on the validation set
#val_acc = (net.predict(X_val) == y_val).mean()
#val_acc = np.sum(net.predict(X_val) == y_val)
#print 'Validation accuracy: ', val_acc
参考:
https://blog.csdn.net/xieyi4650/article/details/53465742