保存在这里,供大家参考
import six.moves.cPickle as pickle
import gzip
import os
import sys
import timeit
import numpy
import theano
import theano.tensor as T
def load_data(dataset):
with gzip.open(dataset, 'rb') as f:
try:
train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
except:
train_set, valid_set, test_set = pickle.load(f)
def shared_dataset(data_xy, borrow = True):
data_x, data_y = data_xy
shared_x = theano.shared(numpy.asarray(data_x, dtype = theano.config.floatX), borrow = borrow)
shared_y = theano.shared(numpy.asarray(data_y, dtype = theano.config.floatX), borrow = borrow)
return shared_x, T.cast(shared_y, 'int32')
test_set_x, test_set_y = shared_dataset(test_set)
valid_set_x, valid_set_y = shared_dataset(valid_set)
train_set_x, train_set_y = shared_dataset(train_set)
res = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]
return res
class LogisticRegression(object):
def __init__(self, input, n_in, n_out):
self.w = theano.shared(value= numpy.zeros( (n_in, n_out), dtype = theano.config.floatX), name = 'w', borrow = True)
self.b = theano.shared(value= numpy.zeros( n_out, dtype = theano.config.floatX), name = 'b', borrow = True)
self.pyx = T.nnet.softmax(T.dot(input, self.w) + self.b )
self.y_pred = T.argmax(self.pyx, axis = 1)
self.params = [self.w, self.b]
self.input = input
def negative_log_likelihood(self, y):
return -T.mean( T.log(self.pyx)[T.arange(y.shape[0]), y] )
def errors(self, y):
if (y.ndim != self.y_pred.ndim):
raise TypeError(('y', y.type, 'y_pred', y_pred.type))
if y.dtype.startswith('int'):
return T.mean(T.neq(self.y_pred, y))
else:
raise NotImplementedError()
def sgd_optimization_mnist(learning_rate = 0.13, n_epoches = 1000, dataset = 'mnist.pkl.gz', batch_size = 600):
data = load_data(dataset)
train_x, train_y = data[0]
valid_x, valid_y = data[1]
test_x, test_y = data[2]
n_train_batch = train_x.get_value(borrow=True).shape[0] // batch_size
n_valid_batch = valid_x.get_value(borrow=True).shape[0] // batch_size
n_test_batch = test_x.get_value(borrow=True).shape[0] // batch_size
x = T.matrix('x')
y = T.ivector('y')
index = T.lscalar()
classifier = LogisticRegression(input = x, n_in = 28*28, n_out = 10)
cost = classifier.negative_log_likelihood(y)
test_model = theano.function(
inputs = [index],
outputs = classifier.errors(y),
givens = {
x: test_x[index * batch_size: (index+1) * batch_size],
y: test_y[index * batch_size: (index+1) * batch_size]
}
)
valid_model = theano.function(
inputs = [index],
outputs = classifier.errors(y),
givens = {
x: valid_x[index * batch_size: (index+1) * batch_size],
y: valid_y[index * batch_size: (index+1) * batch_size]
}
)
g_w = T.grad(cost, classifier.w)
g_b = T.grad(cost, classifier.b)
updates = [(classifier.w, classifier.w - g_w * learning_rate),(classifier.b, classifier.b - g_b * learning_rate)]
train_model = theano.function(
inputs = [index],
outputs = cost,
updates = updates,
givens = {
x: train_x[index * batch_size: (index+1)*batch_size],
y: train_y[index * batch_size: (index+1)*batch_size]
}
)
best_validation_loss = numpy.inf
improvement_threshold = 0.995
epoch = 0
max_epoch = 100
start_time = timeit.default_timer()
while (epoch < max_epoch):
for minibatch_index in range(n_train_batch):
minibatch_cost = train_model(minibatch_index)
validation_loss_ary = [valid_model(i) for i in range(n_valid_batch)]
validation_loss = numpy.mean(validation_loss_ary)
if(validation_loss < best_validation_loss ):
if(validation_loss < best_validation_loss * improvement_threshold):
max_epoch = max(max_epoch, epoch * 2)
best_validation_loss = validation_loss
with open('best_model.pkl', 'wb') as f:
pickle.dump(classifier, f)
print('epoch %i, now validation_loss is %f %%' % (epoch, validation_loss * 100.))
epoch+=1
end_time = timeit.default_timer()
print('The code run for %d epochs, with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time)))
'''
predict_model = theano.function(
inputs = [index],
outputs = classifier.errors(y),
givens = {
x: test_x[index * batch_size: (index+1) * batch_size],
y: test_y[index * batch_size: (index+1) * batch_size]
}
)
test_ans = [predict_model(i) for i in range(n_test_batch)]
test_error = numpy.mean(test_ans)
print('test error is %f %%' % test_error*100)
'''
def predict():
temp = pickle.load(open('best_model.pkl'))
dataset = 'mnist.pkl.gz'
data = load_data(dataset)
test_x, test_y = data[2];
x = T.matrix('x')
y = T.ivector('y')
index = T.iscalar()
classifier = LogisticRegression(x,28*28,10)
classifier.w.set_value (temp.w.get_value())
classifier.b.set_value (temp.b.get_value())
batch_size = 600
n_test_batch = test_x.get_value(borrow = True).shape[0] // batch_size
predict_model = theano.function(
inputs = [index],
outputs = classifier.errors(y),
givens = {
x: test_x[index * batch_size: (index+1) * batch_size],
y: test_y[index * batch_size: (index+1) * batch_size]
}
)
test_ans = [predict_model(i) for i in range(n_test_batch)]
test_error = numpy.mean(test_ans)
print('test error is %f %%' % (test_error*100.) )
if __name__ == '__main__':
sgd_optimization_mnist()
predict()