import gzip
import pickle
import numpy as np
import theano
import theano.tensor as T
from theano.tensor.nnet import conv
from theano.tensor.nnet import softmax
from theano.tensor import shared_randomstreams
from theano.tensor.signal import downsample
def linear(z): return z
def ReLU(z): return T.maximum(0.0, z)
from theano.tensor.nnet import sigmoid
from theano.tensor import tanh
def load_data_shared(filename="./data/mnist.pkl.gz"):
f = gzip.open(filename, 'rb')
u = pickle._Unpickler(f)
u.encoding = 'latin1'
training_data, validation_data, test_data= u.load()
f.close()
def shared(data):
"""Place the data into shared variables. This allows Theano to copy
the data to the GPU, if one is available.
"""
shared_x = theano.shared(
np.asarray(data[0], dtype=theano.config.floatX), borrow=True)
shared_y = theano.shared(
np.asarray(data[1], dtype=theano.config.floatX), borrow=True)
return shared_x,T.cast(shared_y, "int32")
return [shared(training_data), shared(validation_data), shared(test_data)]
class Network(object):
def __init__(self, layers, mini_batch_size):
"""Takes a list of `layers`, describing the network architecture, and
a value for the `mini_batch_size` to be used during training
by stochastic gradient descent.
"""
self.layers = layers
self.mini_batch_size = mini_batch_size
self.params = [param for layer in self.layers for param in layer.params]
self.x = T.matrix("x")
self.y = T.ivector("y")
init_layer = self.layers[0]
init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
for j in range(1, len(self.layers)):
prev_layer, layer = self.layers[j-1], self.layers[j]
layer.set_inpt(
prev_layer.output, prev_layer.output_dropout, self.mini_batch_size)
self.output = self.layers[-1].output
self.output_dropout = self.layers[-1].output_dropout
def SGD(self, training_data, epochs, mini_batch_size, eta,
validation_data, test_data, lmbda=0.0):
"""Train the network using mini-batch stochastic gradient descent."""
training_x, training_y = training_data
validation_x, validation_y = validation_data
test_x, test_y = test_data
num_training_batches = size(training_data)/mini_batch_size
num_validation_batches = size(validation_data)/mini_batch_size
num_test_batches = size(test_data)/mini_batch_size
l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
cost = self.layers[-1].cost(self)+\
0.5*lmbda*l2_norm_squared/num_training_batches
grads = T.grad(cost, self.params)
updates = [(param, param-eta*grad)
for param, grad in zip(self.params, grads)]
i = T.lscalar()
train_mb = theano.function(
[i], cost, updates=updates,
givens={
self.x:
training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
validate_mb_accuracy = theano.function(
[i], self.layers[-1].accuracy(self.y),
givens={
self.x:
validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
test_mb_accuracy = theano.function(
[i], self.layers[-1].accuracy(self.y),
givens={
self.x:
test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
self.y:
test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
self.test_mb_predictions = theano.function(
[i], self.layers[-1].y_out,
givens={
self.x:
test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
})
best_validation_accuracy = 0.0
for epoch in range(epochs):
for minibatch_index in range(int(num_training_batches)):
iteration = num_training_batches*epoch+minibatch_index
if iteration % 1000 == 0:
print("Training mini-batch number {0}".format(iteration))
cost_ij = train_mb(minibatch_index)
if (iteration+1) % num_training_batches == 0:
validation_accuracy = np.mean(
[validate_mb_accuracy(j) for j in range(int(num_validation_batches))])
print("Epoch {0}: validation accuracy {1:.2%}".format(epoch, validation_accuracy))
if validation_accuracy >= best_validation_accuracy:
print("This is the best validation accuracy to date.")
best_validation_accuracy = validation_accuracy
best_iteration = iteration
if test_data:
test_accuracy = np.mean([test_mb_accuracy(j) for j in range(int(num_test_batches))])
print('The corresponding test accuracy is {0:.2%}'.format(test_accuracy))
print("Finished training network.")
print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(
best_validation_accuracy, best_iteration))
print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))
class ConvPoolLayer(object):
"""Used to create a combination of a convolutional and a max-pooling
layer. A more sophisticated implementation would separate the
two, but for our purposes we'll always use them together, and it
simplifies the code, so it makes sense to combine them.
"""
def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
activation_fn=sigmoid):
"""`filter_shape` is a tuple of length 4, whose entries are the number
of filters, the number of input feature maps, the filter height, and the
filter width.
`image_shape` is a tuple of length 4, whose entries are the
mini-batch size, the number of input feature maps, the image
height, and the image width.
`poolsize` is a tuple of length 2, whose entries are the y and
x pooling sizes.
"""
self.filter_shape = filter_shape
self.image_shape = image_shape
self.poolsize = poolsize
self.activation_fn=activation_fn
n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))
self.w = theano.shared(
np.asarray(
np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
dtype=theano.config.floatX),
borrow=True)
self.b = theano.shared(
np.asarray(
np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
dtype=theano.config.floatX),
borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape(self.image_shape)
conv_out = conv.conv2d(
input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
image_shape=self.image_shape)
pooled_out = downsample.max_pool_2d(
input=conv_out, ds=self.poolsize, ignore_border=True)
self.output = self.activation_fn(
pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
self.output_dropout = self.output
class FullyConnectedLayer(object):
def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
self.n_in = n_in
self.n_out = n_out
self.activation_fn = activation_fn
self.p_dropout = p_dropout
self.w = theano.shared(
np.asarray(
np.random.normal(
loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
dtype=theano.config.floatX),
name='w', borrow=True)
self.b = theano.shared(
np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
dtype=theano.config.floatX),
name='b', borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = self.activation_fn(
(1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = T.argmax(self.output, axis=1)
self.inpt_dropout = dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = self.activation_fn(
T.dot(self.inpt_dropout, self.w) + self.b)
def accuracy(self, y):
"Return the accuracy for the mini-batch."
return T.mean(T.eq(y, self.y_out))
class SoftmaxLayer(object):
def __init__(self, n_in, n_out, p_dropout=0.0):
self.n_in = n_in
self.n_out = n_out
self.p_dropout = p_dropout
self.w = theano.shared(
np.zeros((n_in, n_out), dtype=theano.config.floatX),
name='w', borrow=True)
self.b = theano.shared(
np.zeros((n_out,), dtype=theano.config.floatX),
name='b', borrow=True)
self.params = [self.w, self.b]
def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
self.inpt = inpt.reshape((mini_batch_size, self.n_in))
self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
self.y_out = T.argmax(self.output, axis=1)
self.inpt_dropout = dropout_layer(
inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
def cost(self, net):
"Return the log-likelihood cost."
return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])
def accuracy(self, y):
"Return the accuracy for the mini-batch."
return T.mean(T.eq(y, self.y_out))
def size(data):
"Return the size of the dataset `data`."
return data[0].get_value(borrow=True).shape[0]
def dropout_layer(layer, p_dropout):
srng = shared_randomstreams.RandomStreams(
np.random.RandomState(0).randint(999999))
mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape)
return layer*T.cast(mask, theano.config.floatX)
mini_batch_size=10
training_data, validation_data, test_data = load_data_shared()
training_x,training_y=training_data
training_x.get_value(borrow=True).shape
(50000, 784)
mini_batch_size=10
net=Network([
ConvPoolLayer(filter_shape=[20,1,5,5], image_shape=[mini_batch_size,1,28,28],poolsize=(2,2),activation_fn=ReLU),
ConvPoolLayer(filter_shape=[40,20,5,5], image_shape=[mini_batch_size,20,12,12],poolsize=(2,2),activation_fn=ReLU),
FullyConnectedLayer(n_in=40*4*4, n_out=100, activation_fn=sigmoid, p_dropout=0.5),
SoftmaxLayer(n_in=100, n_out=10, p_dropout=0.5)
],
mini_batch_size)
net.SGD(training_data, 20, mini_batch_size, 0.03,validation_data, test_data, lmbda=0.1)
Training mini-batch number 0.0
Training mini-batch number 1000.0
Training mini-batch number 2000.0
Training mini-batch number 3000.0
Training mini-batch number 4000.0
Epoch 0: validation accuracy 91.63%
This is the best validation accuracy to date.
The corresponding test accuracy is 91.46%
Training mini-batch number 5000.0
Training mini-batch number 6000.0
Training mini-batch number 7000.0
Training mini-batch number 8000.0
Training mini-batch number 9000.0
Epoch 1: validation accuracy 95.71%
This is the best validation accuracy to date.
The corresponding test accuracy is 95.53%
Training mini-batch number 10000.0
Training mini-batch number 11000.0
Training mini-batch number 12000.0
Training mini-batch number 13000.0
Training mini-batch number 14000.0
Epoch 2: validation accuracy 96.79%
This is the best validation accuracy to date.
The corresponding test accuracy is 96.55%
Training mini-batch number 15000.0
Training mini-batch number 16000.0
Training mini-batch number 17000.0
Training mini-batch number 18000.0
Training mini-batch number 19000.0
Epoch 3: validation accuracy 97.34%
This is the best validation accuracy to date.
The corresponding test accuracy is 97.09%
Training mini-batch number 20000.0
Training mini-batch number 21000.0
Training mini-batch number 22000.0
Training mini-batch number 23000.0
Training mini-batch number 24000.0
Epoch 4: validation accuracy 97.88%
This is the best validation accuracy to date.
The corresponding test accuracy is 97.77%
Training mini-batch number 25000.0
Training mini-batch number 26000.0
Training mini-batch number 27000.0
Training mini-batch number 28000.0
Training mini-batch number 29000.0
Epoch 5: validation accuracy 98.10%
This is the best validation accuracy to date.
The corresponding test accuracy is 97.99%
Training mini-batch number 30000.0
Training mini-batch number 31000.0
Training mini-batch number 32000.0
Training mini-batch number 33000.0
Training mini-batch number 34000.0
Epoch 6: validation accuracy 98.33%
This is the best validation accuracy to date.
The corresponding test accuracy is 98.25%
Training mini-batch number 35000.0
Training mini-batch number 36000.0
Training mini-batch number 37000.0
Training mini-batch number 38000.0
Training mini-batch number 39000.0
Epoch 7: validation accuracy 98.49%
This is the best validation accuracy to date.
The corresponding test accuracy is 98.49%
Training mini-batch number 40000.0
Training mini-batch number 41000.0
Training mini-batch number 42000.0
Training mini-batch number 43000.0
Training mini-batch number 44000.0
Epoch 8: validation accuracy 98.46%
Training mini-batch number 45000.0
Training mini-batch number 46000.0
Training mini-batch number 47000.0
Training mini-batch number 48000.0
Training mini-batch number 49000.0
Epoch 9: validation accuracy 98.66%
This is the best validation accuracy to date.
The corresponding test accuracy is 98.67%
Training mini-batch number 50000.0
Training mini-batch number 51000.0
Training mini-batch number 52000.0
Training mini-batch number 53000.0
Training mini-batch number 54000.0
Epoch 10: validation accuracy 98.74%
This is the best validation accuracy to date.
The corresponding test accuracy is 98.75%
Training mini-batch number 55000.0
Training mini-batch number 56000.0
Training mini-batch number 57000.0
Training mini-batch number 58000.0
Training mini-batch number 59000.0
Epoch 11: validation accuracy 98.72%
Training mini-batch number 60000.0
Training mini-batch number 61000.0
Training mini-batch number 62000.0
Training mini-batch number 63000.0
Training mini-batch number 64000.0
Epoch 12: validation accuracy 98.85%
This is the best validation accuracy to date.
The corresponding test accuracy is 98.85%
Training mini-batch number 65000.0
Training mini-batch number 66000.0
Training mini-batch number 67000.0
Training mini-batch number 68000.0
Training mini-batch number 69000.0
Epoch 13: validation accuracy 98.83%
Training mini-batch number 70000.0
Training mini-batch number 71000.0
Training mini-batch number 72000.0
Training mini-batch number 73000.0
Training mini-batch number 74000.0
Epoch 14: validation accuracy 98.94%
This is the best validation accuracy to date.
The corresponding test accuracy is 98.96%
Training mini-batch number 75000.0
Training mini-batch number 76000.0
Training mini-batch number 77000.0
Training mini-batch number 78000.0
Training mini-batch number 79000.0
Epoch 15: validation accuracy 98.91%
Training mini-batch number 80000.0
Training mini-batch number 81000.0
Training mini-batch number 82000.0
Training mini-batch number 83000.0
Training mini-batch number 84000.0
Epoch 16: validation accuracy 98.89%
Training mini-batch number 85000.0
Training mini-batch number 86000.0
Training mini-batch number 87000.0
Training mini-batch number 88000.0
Training mini-batch number 89000.0
Epoch 17: validation accuracy 98.85%
Training mini-batch number 90000.0
Training mini-batch number 91000.0
Training mini-batch number 92000.0
Training mini-batch number 93000.0
Training mini-batch number 94000.0
Epoch 18: validation accuracy 99.08%
This is the best validation accuracy to date.
The corresponding test accuracy is 99.17%
Training mini-batch number 95000.0
Training mini-batch number 96000.0
Training mini-batch number 97000.0
Training mini-batch number 98000.0
Training mini-batch number 99000.0
Epoch 19: validation accuracy 99.03%
Finished training network.
Best validation accuracy of 99.08% obtained at iteration 94999.0
Corresponding test accuracy of 99.17%