import numpy as np
import matplotlib.pyplot as plt
import time
def load_planar_dataset():
np.random.seed(1)
m = 400 # number of examples
N = int(m/2) # number of points per class
D = 2 # dimensionality
X = np.zeros((m,D)) # data matrix where each row is a single example
Y = np.zeros((m,1), dtype='uint8') # labels vector (0 for red, 1 for blue)
a = 4 # maximum ray of the flower
for j in range(2):
ix = range(N*j,N*(j+1))
t = np.linspace(j*3.12,(j+1)*3.12,N) + np.random.randn(N)*0.2 # theta
r = a*np.sin(4*t) + np.random.randn(N)*0.2 # radius
X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
Y[ix] = j
X = X.T
Y = Y.T
return X, Y
def init_parameter(num_of_unit, X):
W = []
W.append(np.empty(1))
for i in range(1, len(num_of_unit)):
np.random.seed(1)
W.append(np.random.randn(num_of_unit[i], num_of_unit[i - 1]) / np.sqrt(num_of_unit[i - 1]))
B = []
B.append(np.empty(1))
for i in range(1, len(num_of_unit)):
B.append(np.zeros((num_of_unit[i], 1)))
M = X.shape[1]
L = len(num_of_unit) - 1
return W, B, M, L
def derivative_of_activation(A, tag):
if tag == "sigmoid":
return A * (1 - A)
elif tag == "tanh":
return 1 - A * A
elif tag == "relu":
return np.clip((A > 0), 0, 1)
else:
return np.clip((A > 0), 0.01, 1)
def sigmoid(Z):
return 1 / (1 + np.exp(-Z))
def relu(Z):
return np.maximum(np.zeros(Z.shape), Z)
def leaky_relu(Z):
return np.maximum(0.01 * Z, Z)
def activation_function(Z, tag):
if tag == "sigmoid":
return sigmoid(Z)
elif tag == 'tanh':
return np.tanh(Z)
elif tag == 'relu':
return relu(Z)
else:
return leaky_relu(Z)
def predict(A, Y):
return (1 - np.sum(np.abs(1 * (A > 0.5) - Y)) / Y.shape[1]) * 100
def gradient_descent(X, Y, learning_rate, num_of_unit, iteration_times, tag):
W, B, M, L = init_parameter(num_of_unit, X)
costs = []
start = time.time()
for iteration in range(iteration_times):
'''Forward Propagation'''
A = []
A.append(X)
for i in range(1, L + 1):
Z = W[i] @ A[i - 1] + B[i]
assert(Z.shape == (num_of_unit[i], M))
if i == L:
A.append(sigmoid(Z))
else:
A.append(activation_function(Z, tag))
assert(A[i].shape == (num_of_unit[i], M))
'''******************'''
if iteration % 100 == 0:
cost = (Y @ np.log(A[L].T) + (1 - Y) @ np.log(1 - A[L].T)).squeeze() / -M
costs.append(cost)
dW = {}
dB = {}
'''Backward Propagation'''
for i in range(L, 0, -1):
if i == L:
dZ = A[L] - Y
else:
dA = W[i + 1].T @ dZ
dZ = dA * derivative_of_activation(A[i], tag)
dW[str(i)] = dZ @ A[i - 1].T / M
dB[str(i)] = dZ.sum(axis=1, keepdims=True) / M
'''********************'''
'''*******Update*******'''
for i in range(1, L + 1):
W[i] -= learning_rate * dW[str(i)]
B[i] -= learning_rate * dB[str(i)]
'''********************'''
print("Activation Function: " + tag.title() +
" | Learning Rate: " + str(learning_rate) +
"\nAccuracy: " + str(round(predict(A[L], Y), 2)) + "%" +
" | Runing Time: " + str(round(time.time() - start, 2)) + "seconds"
"\n---------------------------------------------------------------")
return costs
X, Y = load_planar_dataset()
structures = [[2, 4, 1], [2, 8, 1], [2, 4, 2, 1], [2, 8, 4, 1]]
learning_rates = [[5.3, 4.6, 0.0075, 0.0075], [4.95, 4.3, 0.0075, 0.0075],
[5.6, 1.91, 0.0075, 0.0075], [3.88, 0.74, 0.0075, 0.0075]]
activations = ["sigmoid", "tanh", "relu", "leaky_relu"]
iteration_times = 10000
Costs = []
for i in range(4):
cost_temp = []
print("#Neural Network Structure: " + str(structures[i]) + "\n")
for j in range(4):
cost_temp.append(gradient_descent(X, Y, learning_rates[i][j], structures[i], iteration_times, activations[j]))
Costs.append(cost_temp)
fig = plt.figure(figsize=(16, 9))
colors = ['r', 'g', 'b', 'c']
for i in range(4):
ax = fig.add_subplot(2, 2, i + 1)
ax.set_title("Structure:" + str(structures[i]))
ax.set_xlabel("Iterations")
ax.set_ylabel("Cost")
ax.axis([0, 10000, 0, 0.8])
pic = []
for j in range(4):
ax.plot(np.arange(0, 10000, 100), Costs[i][j], color = colors[j], linewidth = 3, label = activations[j])
fig.tight_layout()
plt.legend(bbox_to_anchor=(1.05, 0), loc=3, borderaxespad=0)
plt.show()
输出:
#Neural Network Structure: [2, 4, 1]
Activation Function: Sigmoid | Learning Rate: 5.3
Accuracy: 91.25% | Runing Time: 1.0seconds
---------------------------------------------------------------
Activation Function: Tanh | Learning Rate: 4.6
Accuracy: 91.0% | Runing Time: 1.0seconds
---------------------------------------------------------------
Activation Function: Relu | Learning Rate: 0.0075
Accuracy: 59.0% | Runing Time: 0.93seconds
---------------------------------------------------------------
Activation Function: Leaky_Relu | Learning Rate: 0.0075
Accuracy: 58.0% | Runing Time: 0.81seconds
---------------------------------------------------------------
#Neural Network Structure: [2, 8, 1]
Activation Function: Sigmoid | Learning Rate: 4.95
Accuracy: 90.75% | Runing Time: 1.33seconds
---------------------------------------------------------------
Activation Function: Tanh | Learning Rate: 4.3
Accuracy: 91.25% | Runing Time: 1.43seconds
---------------------------------------------------------------
Activation Function: Relu | Learning Rate: 0.0075
Accuracy: 73.25% | Runing Time: 1.14seconds
---------------------------------------------------------------
Activation Function: Leaky_Relu | Learning Rate: 0.0075
Accuracy: 73.0% | Runing Time: 1.14seconds
---------------------------------------------------------------
#Neural Network Structure: [2, 4, 2, 1]
Activation Function: Sigmoid | Learning Rate: 5.6
Accuracy: 91.5% | Runing Time: 1.49seconds
---------------------------------------------------------------
Activation Function: Tanh | Learning Rate: 1.91
Accuracy: 92.0% | Runing Time: 1.54seconds
---------------------------------------------------------------
Activation Function: Relu | Learning Rate: 0.0075
Accuracy: 76.5% | Runing Time: 1.23seconds
---------------------------------------------------------------
Activation Function: Leaky_Relu | Learning Rate: 0.0075
Accuracy: 76.25% | Runing Time: 1.31seconds
---------------------------------------------------------------
#Neural Network Structure: [2, 8, 4, 1]
Activation Function: Sigmoid | Learning Rate: 3.88
Accuracy: 91.0% | Runing Time: 2.08seconds
---------------------------------------------------------------
Activation Function: Tanh | Learning Rate: 0.74
Accuracy: 91.5% | Runing Time: 2.28seconds
---------------------------------------------------------------
Activation Function: Relu | Learning Rate: 0.0075
Accuracy: 88.5% | Runing Time: 1.5seconds
---------------------------------------------------------------
Activation Function: Leaky_Relu | Learning Rate: 0.0075
Accuracy: 87.75% | Runing Time: 1.61seconds
---------------------------------------------------------------
分析:
1、层数较少时,sigmoid和tanh性能表现相似。
2、拟合非线性数据集,线性单元激活函数表现较差,就本数据集而言,相同学习率下relu函数与leaky relu表现相似n
3、适当增加神经网络每层单元数,会提高预测精确度,但收敛更缓慢
4、学习率是重要参数指标,若代价曲线波动较大,应适当缩小学习率,若收敛较慢,应适当增大