1.代码(python)
初始化变量--->前向计算----->后向运算
此代码是初步描写,此篇博文是未加上偏置b_未加激活函数
第二篇博文是加上偏置b,_未加激活函数的代码,链接(https://blog.csdn.net/huanhuan59/article/details/104483851)
第三篇博文是加上偏置b,_加激活函数的代码,有详细介绍,链接(https://blog.csdn.net/huanhuan59/article/details/104484095)
# -*- coding: UTF-8 -*-
import numpy as np
import scipy
import pandas
import matplotlib as plt
import sklearn
import sklearn.datasets
import math
import random
"""
该数据集有3个类virginica,versicolor和setosa,每类50个样本;
每个样本是一个4维的特征向量,萼片长,萼片宽,花瓣长,花瓣宽;
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
"""
iris = sklearn.datasets.load_iris()
iris_key = iris.keys()
print(iris_key)
iris_data = iris['data']
iris_target = iris['target']
iris_target_names = iris['target_names']
iris_feature_names = iris['feature_names']
"""
print(iris_target)
print(iris_target_names)
print(iris_feature_names)
print("!finish!")
"""
W0 = np.zeros((5,4), dtype=np.float) + 1
W1 = np.zeros((4,5), dtype=np.float) + 1
W2 = np.zeros((3,4), dtype=np.float) + 1
S = np.zeros((3,1), dtype=np.float)
dS_dZ3 = np.zeros((3,1), dtype=np.float)
data_shape = np.shape(iris_data)
data_len = data_shape[0] # 150
feature_len = data_shape[1] # 4
# forward
for i in range(1):
# Layer 0
feature_vector = iris_data[i, :]
Z_1 = np.dot(W0, feature_vector.reshape((4,1))) # 列向量 正确
#Z_1_1 = np.dot(W0, feature_vector.reshape((4, 1))) + b
#Z_1_2 = np.dot(W0, feature_vector.reshape((1,4))) # 行向量 语法错误
#print("Z_1", Z_1)
#print("Z_1", Z_1_1)
# Layer 1
Z_2 = np.dot(W1, Z_1)
Z_2 = Z_2.reshape((4, 1))
#Layer 2
Z_3 = np.dot(W2, Z_2)
Z_3 = Z_3.reshape((3, 1))
# Layer 3
S_sum = math.exp(Z_3[0]) + math.exp(Z_3[1]) + math.exp(Z_3[2])
S[0] = math.exp(Z_3[0]) / S_sum
S[1] = math.exp(Z_3[1]) / S_sum
S[2] = math.exp(Z_3[2]) / S_sum
if iris_target[i] == 0:
label = np.array([1,0,0])
if iris_target[i] == 1:
label = np.array([0,1,0])
if iris_target[i] == 2:
label = np.array([0,0,1])
Loss = -(label[0]*math.log(S[0]) + label[1]*math.log(S[1]) + label[2]*math.log(S[2]))
#print("Loss:", Loss)
# backward
for i in range(3):
# Layer 0
dL_dS = 1 / S
dL_dS = dL_dS.reshape((3, 1))
print("dL_dS",dL_dS)
# Layer 1
if iris_target[i] == 0:
dS_dZ3[0] = S[0] * (1 - S[0])
dS_dZ3[1] = -S[0] * S[1]
dS_dZ3[2] = -S[0] * S[2]
if iris_target[i] == 1:
dS_dZ3[0] = -S[0] * S[1]
dS_dZ3[1] = S[1] * (1 - S[1])
dS_dZ3[2] = -S[2] * S[1]
if iris_target[i] == 2:
dS_dZ3[0] = -S[0] * S[2]
dS_dZ3[1] = -S[1] * S[2]
dS_dZ3[2] = S[2] * (1 - S[2])
dS_dZ3 = dS_dZ3.reshape((3, 1))
print("dS_dZ3",dS_dZ3)
# Layer 2
dZ3_dW2 = np.tile(Z_2.reshape(1,4), (3,1)) # (3, 4)
print("dZ3_dW2", dZ3_dW2)
# dL_dW2 = dL_dS * dS_dZ3 * dZ3_dW2
# = dL_dS * dS_dZ3 * Z2_T
dL_dW2 = dL_dS * np.dot(dS_dZ3, Z_2.reshape(1, 4))
print("dL_dW2: ", dL_dW2) # (3,4)
#Layer 3
dL_dZ2 = np.dot(W2.reshape(4,3),dL_dS*dS_dZ3) #(4,1)
dL_dW1 = np.dot(dL_dZ2,Z_1.reshape(1,5))
#Layer 4
dL_dZ1 = np.dot(W1.reshape(5, 4), dL_dZ2) #(5,1)
dL_dW0 = np.dot(dL_dZ1, feature_vector.reshape((1,4)))
print("feature_vector.reshape((1,4))", feature_vector.reshape((1,4)))
print("dL_dW1", dL_dW1)