目的:搭建一层隐藏层的浅层神经网络
【准备】
1.导入相关包
import xxxx
2.加载数据集
X, Y = load_planar_dataset()
3.查看数据
plt.scatter(X[0, :], X[1, :], c=Y, s=40, cmap=plt.cm.Spectral);
[image]
4.查看数据集dim
shape_X = X.shape
shape_Y = Y.shape
m = X.shape[1] # training set size
【构建算法模型】
1.定义基本模型
def layer_sizes(X, Y):
n_x = X.shape[0] # 定义输入层size
n_h = 4 #定义隐藏层size
n_y = Y.shape[0] # 定义输出层size
2.初始化参数
def initialize_parameters(n_x, n_h, n_y):
W1 = np.random.randn(n_h, n_x)*0.01
b1 = np.zeros((n_h, 1))*0.01
W2 = np.random.randn(n_y, n_h)*0.01
b2 = np.zeros((n_y, 1))
3.正向传播
def forward_propagation(X, parameters):
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
Z1 = np.dot(W1,X) + b1
A1 = np.tanh(Z1)
Z2 = np.dot(W2,A1) + b2
A2 = 1/(1+np.exp(-Z2))
4.计算损失函数
def compute_cost(A2, Y, parameters):
logprobs = np.multiply(np.log(A2),Y) + np.multiply(np.log(1-A2),1-Y)
cost = - np.sum(logprobs)
5.反向传播
def backward_propagation(parameters, cache, X, Y):
W1 = parameters["W1"]
W2 = parameters["W2"]
A1 = cache['A1']
A2 = cache['A2']
dZ2 = A2 - Y
dW2 = np.dot(dZ2,A1.T)/m
db2 = (1/m)*np.sum(dZ2,axis=1,keepdims=True)
dZ1 = np.multiply(np.dot(W2.T,dZ2),(1 - np.power(A1, 2)))
dW1 = np.dot(dZ1,X.T)/m
db1 = (1/m)*np.sum(dZ1,axis=1,keepdims=True)
6.优化参数
def update_parameters(parameters, grads, learning_rate = 1.2):
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
dW1 = grads["dW1"]
db1 = grads["db1"]
dW2 = grads["dW2"]
db2 = grads["db2"]
b2 = b2 - learning_rate*db2
7.整合各个方法
def nn_model(X, Y, n_h, num_iterations = 10000, print_cost=False):
parameters = initialize_parameters(n_x, n_h, n_y)
W1 = np.random.randn(n_h, n_x)*0.01
b1 = np.zeros((n_h, 1))*0.01
W2 = np.random.randn(n_y, n_h)*0.01
b2 = np.zeros((n_y, 1))
for i in range(0, num_iterations):
# Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
A2, cache = forward_propagation(X, parameters)
# Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
cost = compute_cost(A2, Y, parameters)
# Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
grads = backward_propagation(parameters, cache, X, Y)
# Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
parameters = update_parameters(parameters, grads, learning_rate = 1.2)
8.执行预测
def predict(parameters, X):
A2, cache = forward_propagation(X, parameters)
predictions = (A2 > 0.5)
【测试】
# Build a model with a n_h-dimensional hidden layer
parameters = nn_model(X, Y, n_h = 4, num_iterations = 10000, print_cost=True)
# Plot the decision boundary
plot_decision_boundary(lambda x: predict(parameters, x.T), X, Y)
plt.title("Decision Boundary for hidden layer size " + str(4))