You will learn to:
Build the general architecture of a learning algorithm, including:
1. Initializing parameters
2. Calculating the cost function and its gradient
3. Using an optimization algorithm (gradient descent)
1. - import packages
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
from lr_utils import load_dataset
%matplotlib inline
2. - get dataset
# Loading the data (cat/non-cat)
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
3. - find values
- m_train (number of training examples)
- m_test (number of test examples)
- num_px (= height = width of a training image)
Remember that train_set_x_orig is a numpy-array of shape (m_train, num_px, num_px, 3) . For instance, you can access m_train by writing train_set_x_orig.shape[0].
m_train = train_set_x_orig.shape[0]
m_test = test_set_x_orig.shape[0]
num_px = train_set_x_orig.shape[1]
4. - Reshape the training and test examples
A trick when you want to flatten a matrix X of shape (a,b,c,d) to a matrix X_flatten of shape (b ∗ c ∗ d, a) is to use:
X_flatten = X.reshape(X.shape[0], -1).T # X.T is the transpose of X
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
5. - standardize dataset
train_set_x = train_set_x_flatten/255
test_set_x = test_set_x_flatten/255
6 .- Building the parts of our algorithm
The main steps for building a Neural Network are:
1.Define the model structure (such as number of input features)
2.Initialize the model's parameters
3.Loop:
- Calculate current loss (forward propagation)
- Calculate current gradient (backward propagation)
- Update parameters (gradient descent)
6 - 1 helper functions: sigmoid()
def sigmoid(z):
s = 1/(1+np.exp(-z))
return s
6 - 2. Initializing parameters
def initialize_with_zeros(dim):
w = np.zeros((dim,1))
b = 0
assert(w.shape == (dim, 1))
assert(isinstance(b, float) or isinstance(b, int))
return w, b
6 - 3. Forward and Backward propagation
def propagate(w, b, X, Y):
m = X.shape[1]
A = sigmoid(np.dot((w.T), X) + b)
cost = (-1/m) * (np.sum(Y*np.log(A) + (1-Y) * np.log(1-A)))
dw = (1/m) * np.dot(X,(A-Y).T)
db = (1/m) * np.sum(A - Y)
### END CODE HERE ###
assert(dw.shape == w.shape)
assert(db.dtype == float)
cost = np.squeeze(cost)
assert(cost.shape == ())
grads = {"dw": dw,
"db": db}
return grads, cost
6-4 . Optimization
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
costs = []
for i in range(num_iterations):
grads, cost = propagate(w, b, X, Y)
dw = grads["dw"]
db = grads["db"]
w = w - learning_rate*dw
b = b - learning_rate*db
if i % 100 == 0:
costs.append(cost)
if print_cost and i % 100 == 0:
print ("Cost after iteration %i: %f" %(i, cost))
params = {"w": w,
"b": b}
grads = {"dw": dw,
"db": db}
return params, grads, costs
6-5 . predict function
def predict(w, b, X):
m = X.shape[1]
Y_prediction = np.zeros((1,m))
w = w.reshape(X.shape[0], 1)
A = sigmoid(np.dot((w.T), X) + b)
for i in range(A.shape[1]):
if A[0,i] <= 0.5:
Y_prediction[0, i] = 0
if A[0,i] > 0.5:
Y_prediction[0, i] = 1
assert(Y_prediction.shape == (1, m))
return Y_prediction
7.- Merge all functions into a model
def model(X_train, Y_train, X_test, Y_test, num_iterations =2000,learning_rate = 0.5, print_cost = False):
# initialize parameters with zeros
w, b = initialize_with_zeros(X_train.shape[0])
# Gradient descent
parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost = False)
# Retrieve parameters w and b from dictionary "parameters"
w = parameters["w"]
b = parameters["b"]
# Predict test/train set examples
Y_prediction_test = predict(w, b, X_test)
Y_prediction_train = predict(w, b, X_train)
# Print train/test Errors
print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))
d = {"costs": costs,
"Y_prediction_test": Y_prediction_test,
"Y_prediction_train" : Y_prediction_train,
"w" : w,
"b" : b,
"learning_rate" : learning_rate,
"num_iterations": num_iterations}
return d