手写数字识别mnist改进版本实现及代码

手写数字识别,损失函数是CrossEntropyCost,初始化weights,biases是服从于N(0,1/sqrt(x)),其中还有save函数,精度比之前的几种更高

import random
import json
import sys
import numpy as np


def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))


def sigmoid_prime(z):
    return sigmoid(z) * (1 - sigmoid(z))


class QuadraticCost(object):

    @staticmethod
    def fn(a, y):
        """
        Return the cost associated with an output 'a' and desired output 'y'
        :param a:output
        :param y:desired output
        """
        return 0.5 * np.linalg.norm(a - y) ** 2

    @staticmethod
    def delta(z, a, y):
        """ Return the error delta from the output layer """
        return (a - y) * sigmoid_prime(z)


class CrossEntropyCost(object):

    @staticmethod
    def fn(a, y):
        """
        Return the cost associated with an output 'a' and desired output "y"
        Note that np.nan_to_num is to ensure numerical stability . If both "a"
        and "y" have a 1.0 in the same slot, then the expression(1-y)*np.log
        (1-a) return nan. The nan_to_num ensure that is converted to the correct
        value(0.0).
        :param a: output
        :param y: desired output

        """
        return np.sum(np.nan_to_num(-y * np.log(a) - (1-y) * np.log(1 - a)))

    @staticmethod
    def delta(z, a, y):
        """
        Return the error delta from the output layer.Note that the parameter
        "z" is not used by the method.It is include in the method's parameters
        in order to make the interface consistent with the delta method for other
        cost classes.

        """
        return a - y


class Network2(object):
    def __init__(self, sizes, cost=CrossEntropyCost):
        self.layer_number = len(sizes)
        self.sizes = sizes
        self.default_weights()
        self.cost = cost

    def default_weights(self):
        self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
        self.weights = [np.random.randn(y, x) / np.sqrt(x)
                        for x, y in zip(self.sizes[:-1], self.sizes[1:])]

    def larger_weights(self):
        self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(self.sizes[:-1], self.sizes[1:])]

    def feedforward(self, a):
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta,
            lamda = 0.0,
            evaluation_data=None,
            monitor_evaluation_cost=False,
            monitor_evaluation_accuracy=False,
            monitor_training_cost=False,
            monitor_training_accuracy=False):
        if evaluation_data:n_data = len(evaluation_data)
        n = len(training_data)
        evaluation_cost, evaluation_accuracy = [], []
        training_cost, training_accuracy = [], []
        for j in range(epochs):
            random.shuffle(training_data)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta, lamda, len(training_data))
            print("Epoch %s training complete" % j)
            if monitor_training_cost:
                cost = self.total_cost(training_data, lamda)
                training_cost.append(cost)
                print("Cost on training data:{}".format(cost))
            if monitor_training_accuracy:
                accuracy = self.accuracy(training_data, convert=True)
                training_accuracy.append(accuracy)
                print("Accuracy on training data:{}/{}".format(accuracy, n))
            if monitor_evaluation_cost:
                cost = self.total_cost(evaluation_data, lamda, convert=True)
                evaluation_cost.append(cost)
                print("Cost on evaluation data:{}".format(cost))
            if monitor_evaluation_accuracy:
                accuracy = self.accuracy(evaluation_data, convert=True)
                evaluation_accuracy.append(accuracy)
                print("Accuracy on evaluation data:{}/{}".format(
                    accuracy, n_data))
        return evaluation_cost, evaluation_accuracy, training_cost, training_accuracy

    def update_mini_batch(self, mini_batch, eta, lamda, n):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [(1 - eta * (lamda/n)) * w - (eta/len(mini_batch)) * nw
                        for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - (eta/len(mini_batch)) * nb
                       for b, nb in zip(self.biases, nabla_b)]

    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        activation = x
        activations = [x]
        zs = []
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
        delta = self.cost.delta(zs[-1], activations[-1], y)
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        for j in range(2, self.layer_number):
            z = zs[-j]
            ps = sigmoid_prime(z)
            delta = np.dot(self.weights[-j+1].transpose(), delta) * ps
            nabla_b[-j] = delta
            nabla_w[-j] = np.dot(delta, activations[-j-1].transpose())
        return nabla_b, nabla_w

    def accuracy(self, data, convert=False):
        if convert:
            results = [(np.argmax(self.feedforward(x)), np.argmax(y))
                       for (x, y) in data]
        return sum(int(x == y) for (x, y) in results)

    def total_cost(self, data, lamda, convert=False):
        cost = 0.0
        for x, y in data:
            a = self.feedforward(x)
            if convert:
                y = vectorized_result(y)
            cost += self.cost.fn(a, y)/len(data)
        cost += 0.5 * (lamda/len(data)) * sum(
          np.linalg.norm(w) ** 2 for w in self.weights)
        return cost

    def save(self, filename):
        data = {"sizes":self.sizes,
                "weights":[w.tolist() for w in self.weights],
                "biases":[b.tolist() for b in self.biases],
                "cost":str(self.cost.__name__)}
        f = open(filename, "w")
        json.dump(data, f)
        f.close()


def load(filename):
    f = open(filename, "r")
    data = json.load(f)
    f.close()
    cost = getattr(sys.modules[__name__], data["cost"])
    net = Network2(data["sizes"], cost=cost)
    net.weights = [np.array(w) for w in data["weights"]]
    net.biases = [np.array(b) for b in data["biases"]]
    return net


def vectorized_result(j):
    e = np.zeros((10, 1))
    i = 0
    for i in range(10):
        if j[i][0] == 1.0:
            break
    e[i] = 1.0
    return e

代码结果

Epoch 0 training complete
Cost on training data:0.4977334309790729
Accuracy on training data:46630/50000
Cost on evaluation data:0.6867195908091326
Accuracy on evaluation data:9356/10000
Epoch 1 training complete
Cost on training data:0.45054158054369753
Accuracy on training data:47161/50000
Cost on evaluation data:0.7211902341535779
Accuracy on evaluation data:9437/10000
Epoch 2 training complete
Cost on training data:0.40793106496662634
Accuracy on training data:47668/50000
Cost on evaluation data:0.7370525705430018
Accuracy on evaluation data:9518/10000
Epoch 3 training complete
Cost on training data:0.3794341465472041
Accuracy on training data:47885/50000
Cost on evaluation data:0.7512035246564754
Accuracy on evaluation data:9549/10000
Epoch 4 training complete
Cost on training data:0.3894269437669111
Accuracy on training data:47946/50000
Cost on evaluation data:0.7908004354802458
Accuracy on evaluation data:9538/10000
Epoch 5 training complete
Cost on training data:0.36946213849936765
Accuracy on training data:48127/50000
Cost on evaluation data:0.7914577770789031
Accuracy on evaluation data:9569/10000
Epoch 6 training complete
Cost on training data:0.365501613461399
Accuracy on training data:48204/50000
Cost on evaluation data:0.807920084174321
Accuracy on evaluation data:9570/10000
Epoch 7 training complete
Cost on training data:0.38281618123708905

你可能感兴趣的:(深度学习)