MNIST手写数字的识别——DNN篇

DNN要比CNN要简单的多,当年我还用gradient descent写DNN的源代码呢,可惜现在需要学的东西太多了,所以对算法源代码就不如以前深究咯。


# coding=utf-8
# 版权所有,侵权不究
# typhoonbxq
# the University of Hong Kong

from urllib import urlretrieve
import cPickle as pickle
import os
import gzip
import numpy as np
import lasagne
import csv

from lasagne import layers
from lasagne.updates import nesterov_momentum
from nolearn.lasagne import NeuralNet

def load_dataset():
    url = 'http://deeplearning.net/data/mnist/mnist.pkl.gz'
    filename = 'mnist.pkl.gz'
    if not os.path.exists(filename):
        print("Downloading MNIST dataset...")
        urlretrieve(url, filename)
    with gzip.open(filename, 'rb') as f:
        data = pickle.load(f)
    X_train, y_train = data[0]
    X_val, y_val = data[1]
    X_test, y_test = data[2]
    X_train = X_train.reshape((-1, 1, 28, 28))
    X_val = X_val.reshape((-1, 1, 28, 28))
    X_test = X_test.reshape((-1, 1, 28, 28))
    y_train = y_train.astype(np.uint8)
    y_val = y_val.astype(np.uint8)
    y_test = y_test.astype(np.uint8)
    return X_train, y_train, X_val, y_val, X_test, y_test
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
X_train = X_train.reshape(50000,1,28*28)
X_test  = X_test.reshape(10000,1,28*28)
# Set the parameters for the CNN
net1 = NeuralNet(
    layers=[('input',  layers.InputLayer),
            ('dense1', layers.DenseLayer),
            ('dense2', layers.DenseLayer),
            ('output', layers.DenseLayer),
            ],
    # input layer
    input_shape=(None, 1, 28*28),
    dense1_num_units=40,
    dense1_nonlinearity = lasagne.nonlinearities.rectify,
    dense2_num_units = 25,
    dense2_nonlinearity = lasagne.nonlinearities.rectify,
    output_nonlinearity=lasagne.nonlinearities.softmax,
    output_num_units=10,
    update=nesterov_momentum,
    update_learning_rate=0.01,
    update_momentum=0.9,
    max_epochs=10,
    verbose=True,
    )
# Train the network
nn = net1.fit(X_train, y_train)

preds = net1.predict(X_test)

l = len(preds)
count = 0

f0 = open('F:\\result.csv','wb')
f1 = csv.writer(f0)


Y = y_test.tolist()
for i in range(0,l):
    f1.writerow([Y[i],preds[i]])
    if(preds[i] == y_test[i]):
        count = count + 1
acc = count * 100.0 / l
print "The accuracy is %.2f%%"%(acc)
f0.close()

最后我想简单说下这里的activation function,讲道理的话,ReLu是最好的activation function的吧。就是

y = max(0,x),详情资料来自cs231n的TLDR部分

(http://cs231n.github.io/neural-networks-1/)

在在实际操作中,有时被喷的最凶的sigmoid也会成功逆袭QAQ,比如,对Ng公开课后面的作业(exercise4)用DNN进行识别时,用ReLu激活函数时,效果不是很好,我也感觉非常奇怪,下面贴代码。

# coding=utf-8
# 版权所有,侵权不究
# typhoonbxq
# the University of Hong Kong


import scipy.io as scio
import numpy as np
import matplotlib._cm as cm
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from nolearn.lasagne import NeuralNet
from lasagne import layers as layers
import lasagne
from lasagne.updates import nesterov_momentum

data = scio.loadmat('E:\\ML\\machine-learning-ex4\\ex4\\ex4data1.mat')
X_train = np.array(data['X'])
X_train = X_train.reshape(5000,1,400)
X_train = np.float32(X_train)
y_train = np.array(data['y']).ravel()
# temp = X_train[4700]
# pic = temp.reshape(20,20,order = 'F')
# plt.imshow(pic,cmap = cm.binary)
# plt.show()

net1 = NeuralNet(
    layers=[('input', layers.InputLayer),
            ('dense', layers.DenseLayer),
            ('output',layers.DenseLayer),
            ],
    # input layer
    input_shape = (None,1,20*20),
    dense_num_units = 25,
    dense_nonlinearity = lasagne.nonlinearities.rectify,
    output_num_units = 10,
    output_nonlinearity = lasagne.nonlinearities.softmax,
    update=nesterov_momentum,
    update_learning_rate=0.001,
    update_momentum=0.01,
    max_epochs=3000,
    verbose=1,
    )
net1.fit(X_train,y_train)


你可能感兴趣的:(Python学习小记)