斋藤康毅-深度学习入门 学习笔记四

ch 神经网络的学习

  1. 损失函数
    1.1 均方误差
import numpy as np


def mean_squared_error(y, t):
    return 0.5 * np.sum((y - t) ** 2)


if __name__ == '__main__':
    y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
    t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
    print(mean_squared_error(np.array(y), np.array(t)))

  1.2 交叉熵误差

import numpy as np


def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    batch_size = y.shape[0]
    delta = 1e-7
    return -np.sum(t * np.log(y + delta)) / batch_size


if __name__ == '__main__':
    y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
    t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
    print(cross_entropy_error(np.array(y), np.array(t)))
  1. 数值微分
# 中心差分
def numerical_diff(f, x):
    h = 1e-5
    return (f(x + h) - f(x - h)) / (2 * h)


def function(x):
    return 0.01 * (x ** 2) + 0.1 * x


def function_2(x):
    return x[0] ** 2 + x[1] ** 2


def function_tmp1(x0):
    return x0 * x0 + 4.0 ** 2.0


def function_tmp2(x1):
    return 3.0 ** 2.0 + x1 * x1


if __name__ == '__main__':
    print(numerical_diff(function, 10))
    # 偏微分
    print(numerical_diff(function_tmp1, 3.0))
    print(numerical_diff(function_tmp2, 4.0))
  1. 梯度
import numpy as np


def numerical_diff(f, x):
    h = 1e-5
    return (f(x + h) - f(x - h)) / (2 * h)


def function_2(x):
    return x[0] ** 2 + x[1] ** 2

# 求梯度
def _numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)

    for idx in range(x.size):
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x)

        x[idx] = tmp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2 * h)
        x[idx] = tmp_val
    return grad


# 求极值点
# 函数 初值 学习率 迭代次数
def gradient_descent(f, init_x, lr=0.1, step_num=100):
    x = init_x

    for i in range(step_num):
        grad = _numerical_gradient(f, x)
        x -= lr * grad
    return x


if __name__ == '__main__':
    print(_numerical_gradient(function_2, np.array([3.0, 4.0])))
    # 使用梯度下降法求极值
    print(gradient_descent(function_2, init_x=np.array([3.0, -4.0]), lr=0.1, step_num=100))
  1. 学习算法的样例

two_layer_net.py

import numpy as np
import sys, os

sys.path.append(os.pardir)
from ch03.functions.all import *
from loss_function.cross_entropy_error import *
from gradient import numerical_gradient


class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}
        self.params["W1"] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params["b1"] = np.zeros(hidden_size)
        self.params["W2"] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params["b2"] = np.zeros(output_size)

    def predict(self, x):
        W1, W2 = self.params["W1"], self.params["W2"]
        b1, b2 = self.params["b1"], self.params["b2"]

        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)

        return y

    def loss(self, x, t):
        y = self.predict(x)
        return cross_entropy_error(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads["W1"] = numerical_gradient(loss_W, self.params["W1"])
        grads["b1"] = numerical_gradient(loss_W, self.params["b1"])
        grads["W2"] = numerical_gradient(loss_W, self.params["W2"])
        grads["b2"] = numerical_gradient(loss_W, self.params["b2"])
        return grads

train_neuralnet.py

import numpy as np
import sys, os
import matplotlib.pyplot as plt

sys.path.append(os.pardir)
from ch03.functions.all import *
from loss_function.cross_entropy_error import *
from gradient import numerical_gradient
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

train_loss_list = []

# hyper parameters
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):
    print(i)
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    grad = network.numerical_gradient(x_batch, t_batch)
    for key in {"W1", "b1", "W2", "b2"}:
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

print(train_loss_list)
x = [i for i in range(0, len(train_loss_list))]
plt.plot(x, train_loss_list)
plt.show()

总结:

  • 机器学习中使用的数据分为训练数据和测试数据
  • 神经网络用训练数据进行学习, 并用测试数据评价学习到的模型的泛化能力
  • 神经网络的学习以损失函数为指标,更新权重参数,以使损失函数的值减小
  • 利用某个给定的微小值的差分求导数的过程,称为数值微分
  • 利用数值微分,可以计算权重参数的梯度

斋藤康毅-深度学习入门 专栏

你可能感兴趣的:(深度学习入门,学习笔记,神经网络,深度学习,python,人工智能,机器学习)