import os import cv2 from random import randint, uniform import numpy from skimage.io import imshow from skimage import transform, filters, exposure import cPickle import gzip import sys import time import theano import theano.tensor as T from theano.tensor.signal import downsample from theano.tensor.nnet import conv import csv DIC1 = {0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9', 10: 'A', 11: 'B', 12: 'C', 13: 'D', 14: 'E', 15: 'F', 16: 'G', 17: 'H', 18: 'I', 19: 'J', 20: 'K', 21: 'L', 22: 'M', 23: 'N', 24: 'O', 25: 'P', 26: 'Q', 27: 'R', 28: 'S', 29: 'T', 30: 'U', 31: 'V', 32: 'W', 33: 'X', 34: 'Y', 35: 'Z', 36: 'a', 37: 'b', 38: 'c', 39: 'd', 40: 'e', 41: 'f', 42: 'g', 43: 'h', 44: 'i', 45: 'j', 46: 'k', 47: 'l', 48: 'm', 49: 'n', 50: 'o', 51: 'p', 52: 'q', 53: 'r', 54: 's', 55: 't', 56: 'u', 57: 'v', 58: 'w', 59: 'x', 60: 'y', 61: 'z'} DIC2 = {'1': 1, '0': 0, '3': 3, '2': 2, '5': 5, '4': 4, '7': 7, '6': 6, '9': 9, '8': 8, 'A': 10, 'C': 12, 'B': 11, 'E': 14, 'D': 13, 'G': 16, 'F': 15, 'I': 18, 'H': 17, 'K': 20, 'J': 19, 'M': 22, 'L': 21, 'O': 24, 'N': 23, 'Q': 26, 'P': 25, 'S': 28, 'R': 27, 'U': 30, 'T': 29, 'W': 32, 'V': 31, 'Y': 34, 'X': 33, 'Z': 35, 'a': 36, 'c': 38, 'b': 37, 'e': 40, 'd': 39, 'g': 42, 'f': 41, 'i': 44, 'h': 43, 'k': 46, 'j': 45, 'm': 48, 'l': 47, 'o': 50, 'n': 49, 'q': 52, 'p': 51, 's': 54, 'r': 53, 'u': 56, 't': 55, 'w': 58, 'v': 57, 'y': 60, 'x': 59, 'z': 61} def load_test_data(): address = [] data = [] for i in range(6284,12504): address = './testResized/'+str(i)+'.Bmp' img = cv2.imread(address,0) img = img.reshape(64*64)/255. data.append(img) shared_x = theano.shared(numpy.asarray(data,dtype=theano.config.floatX),borrow=True) #print len(data) 6220 return shared_x def load_data_cv(train_path = './trainLabels.csv'): print('Read training data ....') with open(train_path,'r') as reader: reader.readline() train_data = [] #地址 train_label = [] #标签 for line in reader.readlines(): data = line.rstrip().split(',') train_data.append('./trainResized/'+data[0]+'.Bmp') train_label.append(data[1]) return train_data,train_label def load_data(): address , labels = load_data_cv() value = set(labels) dic = [] for i in value: dic.append(i) dic.sort() #print dic dic1 = dict() #用于恢复从0-61数字对应各个字符 dic2 = dict() #用于 映射到0-61的数字,用于softmax函数 for i in range(len(dic)): dic1[i] = dic[i] for i in dic1.keys(): dic2[dic1[i]] = i for i in range(len(labels)): labels[i] = dic2[labels[i]] print labels[:20] #打印前20个 data = [] for i in address: img = cv2.imread(i,0) img = img.reshape(64*64)/255. data.append(img) numpy.amax(img) shared_x = theano.shared(numpy.asarray(data,dtype=theano.config.floatX),borrow=True) shared_y = theano.shared(numpy.asarray(labels,dtype=theano.config.floatX),borrow=True) shared_y = T.cast(shared_y,'int32') return shared_x,shared_y class LogisticRegression(object): """Multi-class Logistic Regression Class The logistic regression is fully described by a weight matrix :math:`W` and bias vector :math:`b`. Classification is done by projecting data points onto a set of hyperplanes, the distance to which is used to determine a class membership probability. """ def __init__(self, input, n_in, n_out): """ Initialize the parameters of the logistic regression :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # start-snippet-1 # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.W = theano.shared( value=numpy.zeros( (n_in, n_out), dtype=theano.config.floatX ), name='W', borrow=True ) # initialize the baises b as a vector of n_out 0s self.b = theano.shared( value=numpy.zeros( (n_out,), dtype=theano.config.floatX ), name='b', borrow=True ) # symbolic expression for computing the matrix of class-membership # probabilities # Where: # W is a matrix where column-k represent the separation hyper plain for # class-k # x is a matrix where row-j represents input training sample-j # b is a vector where element-k represent the free parameter of hyper # plain-k self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b) #softmax()函数 # symbolic description of how to compute prediction as class whose # probability is maximal self.y_pred = T.argmax(self.p_y_given_x, axis=1) # argmax(f(x))是使得 f(x)取得最大值所对应的变量x # end-snippet-1 # parameters of the model self.params = [self.W, self.b] def negative_log_likelihood(self, y): """Return the mean of the negative log-likelihood of the prediction of this model under a given target distribution. .. math:: \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) = \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\ \ell (\theta=\{W,b\}, \mathcal{D}) :type y: theano.tensor.TensorType :param y: corresponds to a vector that gives for each example the correct label Note: we use the mean instead of the sum so that the learning rate is less dependent on the batch size """ # start-snippet-2 # y.shape[0] is (symbolically) the number of rows in y, i.e., # number of examples (call it n) in the minibatch # T.arange(y.shape[0]) is a symbolic vector which will contain # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of # Log-Probabilities (call it LP) with one row per example and # one column per class LP[T.arange(y.shape[0]),y] is a vector # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is # the mean (across minibatch examples) of the elements in v, # i.e., the mean log-likelihood across the minibatch. return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y]) # end-snippet-2 def errors(self, y): """Return a float representing the number of errors in the minibatch over the total number of examples of the minibatch ; zero one loss over the size of the minibatch :type y: theano.tensor.TensorType :param y: corresponds to a vector that gives for each example the correct label """ # check if y has same dimension of y_pred if y.ndim != self.y_pred.ndim: raise TypeError( 'y should have the same shape as self.y_pred', ('y', y.type, 'y_pred', self.y_pred.type) ) # check if y is of the correct datatype if y.dtype.startswith('int'): # the T.neq operator returns a vector of 0s and 1s, where 1 # represents a mistake in prediction return T.mean(T.neq(self.y_pred, y)) else: raise NotImplementedError() class HiddenLayer(object): def __init__(self, rng, input, n_in, n_out, W=None, b=None, activation=T.tanh): #nnet.sigmoid): #T.tanh): self.input = input if W is None: W_values = numpy.asarray( rng.uniform( low=-numpy.sqrt(6. / (n_in + n_out)), high=numpy.sqrt(6. / (n_in + n_out)), size=(n_in, n_out) ), dtype=theano.config.floatX ) if activation == theano.tensor.nnet.sigmoid: W_values *= 4 W = theano.shared(value=W_values, name='W', borrow=True) if b is None: b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) b = theano.shared(value=b_values, name='b', borrow=True) self.W = W self.b = b lin_output = T.dot(input, self.W) + self.b #W(784,500) self.output = ( lin_output if activation is None else activation(lin_output) ) self.params = [self.W, self.b] class LeNetConvPoolLayer(object): """Pool Layer of a convolutional network """ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): """ :param filter_shape: (number of filters, num input feature maps, filter height, filter width) :param image_shape: (batch size, num input feature maps, image height, image width) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv.conv2d( input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape ) # downsample each feature map individually, using maxpooling pooled_out = downsample.max_pool_2d( input=conv_out, ds=poolsize, ignore_border=True ) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) ''' http://deeplearning.net/software/theano/library/tensor/basic.html ''' # store parameters of this layer self.params = [self.W, self.b] def submit_74k(nkerns=[128,128,256,256], batch_size=311): rng = numpy.random.RandomState(23455) test_set = load_test_data() n_test_batches = test_set.get_value(borrow=True).shape[0] n_test_batches /= batch_size index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels layer0_input = x.reshape((batch_size, 1, 64, 64)) #batch_size 500 # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 64, 64), filter_shape=(nkerns[0], 1, 3, 3), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 31, 31), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(2, 2), ) layer2 = LeNetConvPoolLayer( rng, input=layer1.output, image_shape=(batch_size, nkerns[1], 14, 14), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), ) layer3 = LeNetConvPoolLayer( rng, input=layer2.output, image_shape=(batch_size, nkerns[2], 6, 6), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(2, 2), ) layer4_input = layer3.output.flatten(2) # construct a fully-connected sigmoidal layer layer4 = HiddenLayer( rng, input=layer4_input, n_in=nkerns[3] * 2 * 2, n_out=2048, activation=T.tanh ) layer5 = HiddenLayer( rng, input=layer4.output, n_in=2048, n_out=2048, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer6 = LogisticRegression(input=layer5.output, n_in=2048, n_out=62) # the cost we minimize during training is the NLL of the model cost = layer6.negative_log_likelihood(y) # create a list of all model parameters to be fit by gradient descent params = layer6.params + layer5.params + layer4.params + layer3.params + layer2.params+ layer1.params + layer0.params layers = [layer0,layer1,layer2,layer3,layer4,layer5,layer6] # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer6.y_pred, givens={ x: test_set[index * batch_size: (index + 1) * batch_size], } ) save_file = open('weightCNN') for layer in layers: w , b = layer.params w.set_value(cPickle.load(save_file),borrow=True) b.set_value(cPickle.load(save_file),borrow=True) save_file.close() prediction = [] pred = [] for i in range(n_test_batches): pred = test_model(i) prediction.append(pred) return prediction def evaluate_74k(learning_rate=0.1, n_epochs=10, nkerns=[128,128,256,256], batch_size=200): #nkerns=[20, 50] rng = numpy.random.RandomState(23455) train_set_x, train_set_y = load_data() valid_set_x, valid_set_y = train_set_x, train_set_y test_set_x, test_set_y = train_set_x, train_set_y # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ''' ###################### # BUILD ACTUAL MODEL # ###################### ''' print '... building the model' start_time = time.clock() # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 1, 64, 64)) #batch_size 500 # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling reduces this further to (24/2, 24/2) = (12, 12) # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 64, 64), filter_shape=(nkerns[0], 1, 3, 3), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 31, 31), filter_shape=(nkerns[1], nkerns[0], 3, 3), poolsize=(2, 2), ) layer2 = LeNetConvPoolLayer( rng, input=layer1.output, image_shape=(batch_size, nkerns[1], 14, 14), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), ) layer3 = LeNetConvPoolLayer( rng, input=layer2.output, image_shape=(batch_size, nkerns[2], 6, 6), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(2, 2), ) layer4_input = layer3.output.flatten(2) # construct a fully-connected sigmoidal layer layer4 = HiddenLayer( rng, input=layer4_input, n_in=nkerns[3] * 2 * 2, n_out=2048, activation=T.tanh ) layer5 = HiddenLayer( rng, input=layer4.output, n_in=2048, n_out=2048, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer6 = LogisticRegression(input=layer5.output, n_in=2048, n_out=62) # the cost we minimize during training is the NLL of the model cost = layer6.negative_log_likelihood(y) # create a list of all model parameters to be fit by gradient descent params = layer6.params + layer5.params + layer4.params + layer3.params + layer2.params+ layer1.params + layer0.params layers = [layer0,layer1,layer2,layer3,layer4,layer5,layer6] ''' ############### # TRAIN MODEL # ############### ''' grads = T.grad(cost, params) updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer6.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) train_model = theano.function( [index], layer6.y_pred, #输出数值没有影响,可以用于打印要观察的数据 #cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] }, ) if os.path.exists('weightCNN'): save_file = open('weightCNN') for layer in layers: w , b = layer.params w.set_value(cPickle.load(save_file),borrow=True) b.set_value(cPickle.load(save_file),borrow=True) save_file.close() test_model = theano.function( [index], layer6.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) test_losses = [test_model(i) for i in xrange(n_test_batches)] this_test_loss = numpy.mean(test_losses) print 'the loss rate is',this_test_loss*100,'%' else: # create a list of gradients for all model parameters ############### # TRAIN MODEL # ############### print '... training' start_time = time.clock() for i in range(n_epochs): errors = [] for j in range(n_train_batches): pred = train_model(j) #print pred pred = train_model(0) print pred[:20] for p in range(10): errors.append(test_model(p)) print 'the ',i,' epoch error rate is ',numpy.mean(errors)*100,'%' end_time = time.clock() t = (end_time - start_time) / 60. print 'the code running time is ',t,' minites' save_file = open('weightCNN','wb') for layer in layers: w , b = layer.params cPickle.dump(w.get_value(borrow=True),save_file,-1) cPickle.dump(b.get_value(borrow=True),save_file,-1) save_file.close() if __name__ == '__main__': if os.path.exists('weightCNN'): evaluate_74k(batch_size=200,nkerns=[128,128,256,256]) else: evaluate_74k(batch_size=200,nkerns=[128,128,256,256],n_epochs=50) ''' prediction = submit_74k(nkerns=[128,128,256,256]) tmp = [] for i in prediction: for j in i: tmp.append(DIC1[j]) result =['H','E','I','p','T','t','o','d','B','H','N','W','n','H','e','s','A','R','X','S','M','1','A','8','A','S','B','U','n','n','A','W','e','V','c','a','L','h','o','i','W','I','A','m','g','I','u','A','i','V','u','E','i','a','W','U','b','S','n','8','0','P','E','7','R','f','c','p','W','I','o','6','0','J','r','N','d','W','N','H','X','M','Z','P','H','I','Y','h','N','R','A','y','L','A','F','S','T','M','O','E','R','R','S','G','n','o','A','e','T','2','A','P','I','m','S','A','t','S','t','E','N','V','W','m','I','5','N','C'] #6411=C e = 0. for i in range(len(result)): if(result[i]!=tmp[i]): e+=1. print 'the correct rate is ',100-e/len(result)*100.,'%' print('Saving...') with open('test.csv', 'w') as writer: writer.write('"ID","Class"\n') for i in range(6284,12504): writer.write(str(i) + ',"' + str(tmp[i-6284]) + '"\n') '''最后的识别率
代码里有关于theano测试权值的保存,以后如果有要用到项目中去可以提取出权值来做识别,这里推荐一篇很好的文章http://blog.csdn.net/qiaofangjie/article/details/18042407 现在只是看了部分代码,等有时间准备自己实现一下代码,应该还是很有意义的。