1.代码:加上偏置加上激活函数Relu,softmax函数
以iris的三种类型花为例:其中x1,x2,x3,x4分别是花萼长,花萼宽,花瓣长,花瓣宽,(一共150个数据,其中前50数据是第一种品种,中间50个数据是第二种花品种,最后50个数据是第三种花品种),分别拿每个品种的35个数据混合打乱作为train,输入进去更新求得w和b,然后用各自的最后15个数据用来测试正确率。此程序正确率偏低,程序有待提高优化,仅供参考
代码:
# -*- coding: UTF-8 -*-
from math import exp
import numpy as np
import scipy
import pandas
import matplotlib as plt
import sklearn
import sklearn.datasets
import math
import random
from math import exp
"""
该数据集有3个类virginica,versicolor和setosa,每类50个样本;
每个样本是一个4维的特征向量,萼片长,萼片宽,花瓣长,花瓣宽;
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
"""
iris = sklearn.datasets.load_iris()
iris_key = iris.keys()
print(iris_key)
iris_data = iris['data']
iris_target = iris['target'].reshape(150, 1)
iris_data_target = np.hstack((iris_data, iris_target))
# train
iris_data_target_0 = iris_data_target[0:35]
iris_data_target_1 = iris_data_target[50:85]
iris_data_target_2 = iris_data_target[100:135]
iris_data_target_sum = np.vstack((iris_data_target_0, iris_data_target_1, iris_data_target_2))
iris_data_target_shuffle = np.random.permutation(iris_data_target_sum)
iris_data = iris_data_target_shuffle[:, 0:4]
iris_target = iris_data_target_shuffle[:, 4]
iris_target_names = iris['target_names']
iris_feature_names = iris['feature_names']
"""
print(iris_target)
print(iris_target_names)
print(iris_feature_names)
print("!finish!")
"""
alpha = 0.001
W0 = np.zeros((5, 4), dtype=np.float) + 1
W1 = np.zeros((4, 5), dtype=np.float) + 1
W2 = np.zeros((3, 4), dtype=np.float) + 1
S = np.zeros((3, 1), dtype=np.float)
dS_dZ3 = np.zeros((3, 1), dtype=np.float)
# 初始化 b
b0 = np.zeros((5, 1), dtype=np.float) + 1
b1 = np.zeros((4, 1), dtype=np.float) + 1
b2 = np.zeros((3, 1), dtype=np.float) + 1
data_shape = np.shape(iris_data)
data_len = data_shape[0] # 150
feature_len = data_shape[1] # 4
for k in range(100):
# forward
for i in range(100):
# Layer 0
feature_vector = iris_data[i, :]
Z_1 = np.dot(W0, feature_vector.reshape((4, 1))) + b0 # 列向量 正确
a_1 = 1 * (Z_1 > 0) * Z_1 # 激活函数
#print("Z_1=",Z_1,"a_1=",a_1)
# Layer 1
Z_2 = np.dot(W1, a_1) + b1 # [4,5]*[5,1]---->[4,1]
a_2 = 1 * (Z_2 > 0) * Z_2 # 激活函数
a_2 = a_2.reshape((4, 1))
# Layer 2
Z_3 = np.dot(W2, a_2) + b2
Z_3 = Z_3.reshape((3, 1))
# Layer 3
S_sum = math.exp(Z_3[0]) + math.exp(Z_3[1]) + math.exp(Z_3[2])
S[0] = math.exp(Z_3[0]) / S_sum
S[1] = math.exp(Z_3[1]) / S_sum
S[2] = math.exp(Z_3[2]) / S_sum
if iris_target[i] == 0:
label = np.array([1, 0, 0])
if iris_target[i] == 1:
label = np.array([0, 1, 0])
if iris_target[i] == 2:
label = np.array([0, 0, 1])
Loss = -(label[0] * math.log(S[0]) + label[1] * math.log(S[1]) + label[2] * math.log(S[2]))
# print("Loss_Origin:", Loss)
# backward
# for i in range(3):
# Layer 0
if iris_target[i] == 0:
dL_dS = - 1 / S[0]
if iris_target[i] == 1:
dL_dS = - 1 / S[1]
if iris_target[i] == 2:
dL_dS = - 1 / S[2]
# print("dL_dS",dL_dS)
# Layer 1
if iris_target[i] == 0:
dS_dZ3[0] = S[0] * (1 - S[0])
dS_dZ3[1] = -S[0] * S[1]
dS_dZ3[2] = -S[0] * S[2]
if iris_target[i] == 1:
dS_dZ3[0] = -S[0] * S[1]
dS_dZ3[1] = S[1] * (1 - S[1])
dS_dZ3[2] = -S[2] * S[1]
if iris_target[i] == 2:
dS_dZ3[0] = -S[0] * S[2]
dS_dZ3[1] = -S[1] * S[2]
dS_dZ3[2] = S[2] * (1 - S[2])
dS_dZ3 = dS_dZ3.reshape((3, 1))
# print("dS_dZ3",dS_dZ3)
# Layer 2
dZ3_dW2 = np.tile(a_2.reshape(1, 4), (3, 1)) # [3, 4]
dL_dW2 = dL_dS * np.dot(dS_dZ3, a_2.reshape(1, 4)) # [3,1]*[1,4]---->[3,4]
dZ3_db2 = np.tile(1, (3, 1))
dL_db2 = dL_dS * dS_dZ3
# print("dL_db2: ", dL_db2)
# Layer 3
dL_da2 = np.dot(W2.reshape(4, 3), dL_dS * dS_dZ3) # [4,3]*[3,1]---->[4,1]
da2_dZ2 = 1 * (a_2 > 0) # a2对Z2求导,(a2==Z2)!=0时,导数为1,其他情况为0
dL_dZ2 = dL_da2 * da2_dZ2 # (4,1)*(4,1)---->(4,1)
dZ2_dW1 = np.tile(a_1.reshape(1, 5), (4, 1)) # [4,5]
dL_dW1 = np.dot(dL_dZ2, a_1.reshape(1, 5)) # [4,1]*[1,5]---->[4,5]
dZ3_db1 = np.tile(1, (4, 1))
dL_db1 = dL_dZ2
# Layer 4
dL_da1 = np.dot(W1.reshape(5, 4), dL_dZ2) # [5,4]*[4,1]----->[5,1]
da1_dZ1 = 1 * (a_1 > 0) # a1对Z1求导,(a1==Z1)!=0时,导数为1,其他情况为0
dL_dZ1 = dL_da1 * da1_dZ1 # (5,1)*(5,1)=(5,1)
dL_dW0 = np.dot(dL_dZ1, feature_vector.reshape((1, 4))) # [5,1] *[1,4]---->[5,4]
dZ2_db0 = np.tile(1, (5, 1))
dL_db0 = dL_dZ1
# update
W0 = W0 - alpha * dL_dW0
W1 = W1 - alpha * dL_dW1
W2 = W2 - alpha * dL_dW2
b0 = b0 - alpha * dL_db0
b1 = b1 - alpha * dL_db1
b2 = b2 - alpha * dL_db2
#############################################
# test
# test
iris_data_target_test0 = iris_data_target[35:50]
iris_data_target_test1 = iris_data_target[85:100]
iris_data_target_test2 = iris_data_target[135:150]
iris_data_target_test_sum = np.vstack((iris_data_target_test0, iris_data_target_test1, iris_data_target_test2))
#print("iris_data_target_test_sum:", iris_data_target_test_sum)
n = 0
false_nums = 0;
sample_test1 = iris_data_target_test_sum[:, 0:4] # (45, 4)
sample_test1_target = iris_data_target_test_sum[:, 4] # (45, 1)
for i in range(45):
# Layer 0
Z_1 = np.dot(W0, sample_test1[i].reshape((4, 1))) + b0 # 列向量 正确(5,4)*(4,1)+(5,1)--->(5,1)
a_1 = 1 * (Z_1 > 0) * Z_1 # 激活函数
# Layer 1
Z_2 = np.dot(W1, a_1) + b1 # [4,5]*[5,1]---->[4,1]
a_2 = 1 * (Z_2 > 0) * Z_2 # 激活函数
a_2 = a_2.reshape((4, 1))
# Layer 2
Z_3 = np.dot(W2, a_2) + b2
Z_3 = Z_3.reshape((3, 1))
# Layer 3
S_sum = math.exp(Z_3[0]) + math.exp(Z_3[1]) + math.exp(Z_3[2])
S[0] = math.exp(Z_3[0]) / S_sum
S[1] = math.exp(Z_3[1]) / S_sum
S[2] = math.exp(Z_3[2]) / S_sum
if S[0] > S[1] and S[0] > S[2]:
iris_target_test = 0
elif S[1] > S[0] and S[1] > S[2]:
iris_target_test = 1
else:
iris_target_test = 2
if sample_test1_target[i] == iris_target_test:
n = n + 1
print("i=", i, "n=", n, "iris_target_test=", iris_target_test, "sample_test1_target=", sample_test1_target[i])
else:
false_nums = false_nums + 1;
print("i=",i, "false_nums=", false_nums);
right = n / 45
print("right", right)
# print("sample_test: ", sample_test)