1.Code
#-*- utf-8 -*-
"""
date: 2018/12/3
cost: a day
"""
"""
log
今日发现的cost下降十分慢,与答案有出入的原因:
update写错
(另一个注意点:initialize parameters ------/np.sqrt() )
"""
import matplotlib.pyplot as plt
import numpy as np
import lr_utils
import testCases
from dnn_utils import relu, sigmoid
def get_data_set(layer_dims):
train_x_orig, train_y_orig, test_x_orig, test_y_orig, classes= lr_utils.load_dataset()
train_x = train_x_orig.reshape(train_x_orig.shape[0], -1).T / 255
train_y = train_y_orig
test_x = test_x_orig.reshape(test_x_orig.shape[0], -1).T / 255
test_y = test_y_orig
return train_x, train_y, test_x, test_y, classes
def initialize_parameters(layer_dims):
np.random.seed(3)
parameters = {}
L = len(layer_dims)
for x in range(1, L):
parameters["W" + str(x)] = np.random.randn(layer_dims[x], layer_dims[x - 1]) / np.sqrt(layer_dims[x-1])
parameters["b" + str(x)] = np.zeros((layer_dims[x],1))
assert(parameters["W" + str(x)].shape == (layer_dims[x], layer_dims[x - 1]) )
assert(parameters["b" + str(x)].shape == (layer_dims[x],1))
return parameters
def sigmoid(Z):
return 1 / (1 + np.exp(- Z))
def relu(Z):
return np.maximum(0, Z)
def forward(X, parameters):
A_caches = []
A_caches.append(X)
L = len(parameters) // 2
A_prev = X
for l in range(1, L+1):
Z = np.dot(parameters["W" + str(l)], A_prev) + parameters["b" + str(l)]
if l != L:
A = relu(Z)
else:
A = sigmoid(Z)
A_prev = A
A_caches.append(A)
return A, A_caches
def relu_backward(dAL, A):
dZ = np.array(dAL, copy=True)
#dZ = dAL
dZ[A == 0] = 0
return dZ
def backward(AL, Y, parameters, A_caches):
grads = {}
L = len(parameters) // 2
dAL = - np.divide(Y, AL) + np.divide(1 -Y, 1- AL)
#dAL = (1 - Y) / (1 - AL) - Y / AL
grads["dA" + str(L)] = dAL
for l in reversed(range(1, L+1)):
if l == L:
dZ = dAL *A_caches[l] * (1 - A_caches[l])
else:
dZ = relu_backward(dA, A_caches[l])
dW = np.dot(dZ, A_caches[l - 1].T) / Y.shape[1]
db = np.sum(dZ, axis = 1, keepdims = True) / Y.shape[1]
grads["dW" + str(l)] = dW
grads["db" + str(l)] = db
if l != 1:
dA = dA_prev = np.dot(parameters["W" + str(l)].T, dZ)
grads["dA" + str(l-1)] = dA
return grads
def update(parameters, grads, learning_rate):
L = len(parameters) // 2
for l in range(1,L+1):
parameters["W" + str(l)] -= grads["dW" + str(l)] * learning_rate
parameters["b" + str(l)] -= grads["db" + str(l)] * learning_rate
return parameters
def dnn_model(train_x, train_y, parameters, num_iterations = 3000, learning_rate = 0.0075, print_cost = False, isPlot = False):
costs = []
for i in range(num_iterations):
AL, A_caches = forward(train_x, parameters)
grads = backward(AL, train_y, parameters, A_caches)
parameters = update(parameters,grads,learning_rate)
if i % 100 == 0:
cost = - np.mean(train_y * np.log(AL) + (1 - train_y) * np.log(1 - AL))
costs.append(cost)
if print_cost:
print("第" + str(i+1) + "次迭代 cost = " + str(cost))
if isPlot:
plt.plot(costs)
plt.xlabel("迭代次数(每百次)")
plt.ylabel("cost")
plt.show()
return parameters
def predict(X, parameters):
Y_forward,cache = forward(X, parameters)
Y = np.array([[1 if x > 0.5 else 0 for x in Y_forward.squeeze()]])
return Y
if __name__ == "__main__":
layer_dims = [12288, 20, 7, 5, 1]
train_x, train_y, test_x, test_y, classes = get_data_set(layer_dims)
parameters = initialize_parameters(layer_dims)
parameters = dnn_model(train_x, train_y, parameters, num_iterations = 3000, learning_rate = 0.0075, print_cost = True, isPlot = False)
predict_train_y = predict(train_x, parameters)
predict_test_y = predict(test_x, parameters)
train_correct_rate = float(np.sum(train_y == predict_train_y) / train_y.shape[1])
test_correct_rate = float(np.sum(test_y == predict_test_y) / test_y.shape[1])
print("correct rate of train: " + str(train_correct_rate * 100) + "%")
print("correct rate of test: " + str(test_correct_rate * 100) + "%")
3.Learning Reflection
3.1 pay much attention to unit test
3.2 make it easy