基于数值微分的反向传播-python实现

深度学习

基础知识和各种网络结构实战 ...

基于数值微分的反向传播-python实现

  • 深度学习
  • 前言
  • 一、基于数值微分的反向传播
    • 1.1 定义激活函数
    • 1.2 计算基于小批次损失函数的损失值
    • 1.3 梯度下降
    • 1.4 两次神经网络类
    • 1.5 使用pytorch框架进行数据的下载与读取
    • 1.6 转化为独热码
    • 1.6 转化为独热码
    • 1.7 使用测试集进行评价
  • 总结

前言

我将会把每一步罗列出来

一、基于数值微分的反向传播

1.1 定义激活函数

def relu(x):
    return np.maximum(0,x)
def softmax_1(z):
    if z.ndim==2:
        c=np.max(z,axis=1)
        z=z.T-c#溢出对策
        y=np.exp(z)/np.sum(np.exp(z),axis=0)
        return y.T
    c=np.max(z)
    exp_z = np.exp(z - c)
    return exp_z/np.sum(exp_z)

1.2 计算基于小批次损失函数的损失值

#交叉熵误差
def cross_entropy_error(p,y):
    '''
    :param p: 预测值
    :param y: 真实值
    :return: 损失值
    '''
    delta=1e-7#防止出现log(0)
    batch_size=p.shape[0]
    return -np.sum(y*np.log(p+delta))/batch_size

1.3 梯度下降

def numerical_gradient(f, x):
    h = 1e-4  # 0.0001
    grad = np.zeros_like(x)

    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x)  # f(x+h)

        x[idx] = tmp_val - h
        fxh2 = f(x)  # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2 * h)

        x[idx] = tmp_val  # 还原值
        it.iternext()

    return grad

1.4 两次神经网络类

class two_layer:
    #定义网络
    def __init__(self,input_size,hidden_size,output_size,weight_init_std=0.01):
        #初始化权重
        #weight_init_std为防止权重过大
        self.network={}#储存权重及偏移量的值,
        weight_scale=1e-3
        self.network['W1']=weight_init_std*np.random.randn(input_size,hidden_size)
        self.network['b1']=np.zeros(hidden_size)
        self. network['W2']=weight_init_std*np.random.randn(hidden_size,output_size)
        self.network['b2']=np.zeros(output_size)

    #-----------------------------------------------------------------------------------------------------------------------
    #预测
    #前向传播
    def predict(self,x):
        W1,W2=self.network['W1'],self.network['W2']
        b1,b2=self.network['b1'],self.network['b2']
        a1=np.dot(x,W1)+b1
        z1=softmax_1(a1)
        a2=np.dot(z1,W2)+b2
        predict=softmax_1(a2)
        return predict
    #损失函数
    def loss(self,x,y):
        p=self.predict(x)
        return cross_entropy_error(p,y)
    #梯度下降
    def numerical_gradient(self,x,y):
        loss_W=lambda W:self.loss(x,y)

        grad={}
        grad['W1']=numerical_gradient(loss_W,self.network['W1'])
        grad['b1']=numerical_gradient(loss_W,self.network['b1'])
        grad['W2']=numerical_gradient(loss_W,self.network['W2'])
        grad['b2']=numerical_gradient(loss_W,self.network['b2'])
        return grad
    #-----------------------------------------------------------------------------------------------------------------------

    def acc(self,x,t):
        p=self.predict(x)
        y = np.argmax(t, axis=1)
        p=np.argmax(y,axis=1)

        acc=np.sum(p==y)/float(x.shape[0])
        return acc

1.5 使用pytorch框架进行数据的下载与读取

# 使用pytorch框架进行数据的下载与读取
import torch
from torch.utils.data import DataLoader
import torchvision.datasets as da
batch_size = 100
# minist dataset
train_dataset = da.MNIST(root='/pymnist',  # 选择数据根目录
                         train=True,  # 选择训练数据集
                         transform=None,  # 不考虑使用任何数据预处理
                         download=True)  # 从网上下载
test_dataset = da.MNIST(root='/pymnist',  # 选择数据根目录
                        train=False,  # 选择测试数据集
                        transform=None,  # 不考虑使用任何数据预处理
                        download=True)  # 从网上下载
# 加载数据
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)  # 打乱数据
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

1.6 转化为独热码

x_train=train_dataset.train_data.numpy().reshape(-1,28*28)
y_train_tmp=train_dataset.train_labels.reshape(train_dataset.train_labels.shape[0],1)
y_train=torch.zeros(y_train_tmp.shape[0],10).scatter_(1,y_train_tmp,1).numpy()
x_test=test_dataset.test_data.numpy().reshape(-1,28*28)
y_test_tmp = test_dataset.test_labels.reshape(test_dataset.test_labels.shape[0], 1)
y_test = torch.zeros(y_test_tmp.shape[0], 10).scatter_(1, y_test_tmp, 1).numpy()

1.6 转化为独热码

x_train=train_dataset.train_data.numpy().reshape(-1,28*28)
y_train_tmp=train_dataset.train_labels.reshape(train_dataset.train_labels.shape[0],1)
y_train=torch.zeros(y_train_tmp.shape[0],10).scatter_(1,y_train_tmp,1).numpy()
x_test=test_dataset.test_data.numpy().reshape(-1,28*28)
y_test_tmp = test_dataset.test_labels.reshape(test_dataset.test_labels.shape[0], 1)
y_test = torch.zeros(y_test_tmp.shape[0], 10).scatter_(1, y_test_tmp, 1).numpy()

1.7 使用测试集进行评价

train_size = x_train.shape[0]
iters_num = 600
learning_rate = 0.001
epoch = 5
batch_size = 100

network = two_layer(input_size=784, hidden_size=50, output_size=10)

for i in range(epoch):
    print('current epoch is :', i)
    for num in range(iters_num):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        y_batch = y_train[batch_mask]

        grad = network.numerical_gradient(x_batch, y_batch)

        for key in ('W1', 'b1', 'W2', 'b2'):
            network.network[key] -= learning_rate * grad[key]

        loss = network.loss(x_batch, y_batch)
        if num % 100 == 0:
            print(loss)
print(network.acc(x_test, y_test))

总结

期待大家和我交流,留言或者私信,一起学习,一起进步!

你可能感兴趣的:(深度学习,深度学习,pytorch)