学习目标:
使用单隐藏层神经网络进行分类,使用非线性激活函数, 计算交叉熵,执行前向和反向传播
使用的库:
numpy, matplotlib.pyplot, sklearn, sklearn.linear_model
设置随机种子以保持结果一致性:
np.random.seed(1)
使用sklearn进行简单逻辑回归:
clf = sklearn.linear_model.LogisticRegressionCV()
clf.fit(X.T, Y.T) #原数据每一列代表一个样本,转换为每一列代表一个特征
LR_predictions = clf.predict(X.T)
Accuracy: float((np.dot(Y, LR_predictions) + np.dot(1-Y, 1- LR_predictions)) / float(Y.size)
需要定义的函数:
def layer_sizes(X, Y):
n_x = X.shape[0]
n_h = 4
n_y = Y.shape[0]
return (n_x, n_h, n_y)
------------------------------------
def initialize_parameters(n_x, n_h, n_y):
W1 = np.random.randn(n_h, n_x) * 0.01 #注意W的形状是(l, l-1), 乘以一个非常小的数,但不能为0
b1 = np.zeros((n_h, 1))
W2 = np.random.randn(n_y, n_h) * 0.01
b2 = np.zeros((n_y, 1))
parameters = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
return parameters
------------------------------------
def forward_propagation(X, parameters):
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
Z1 = np.dot(W1, X) + b1
A1 = np.tanh(Z1)
Z2 = np.dot(W2, A1) + b2
A2 = sigmoid(Z2)
cache = {'Z1': Z1, 'A1': A1, 'Z2': Z2, 'A2': A2} #反向传播中需要
return A2, cache
--------------------------------------
def compute_cost(A2, Y, parameters):
m = Y.shape[1]
logprobs = np.multiply(Y, np.log(A2)) + np.multiply((1-Y), np.log(1-A2))
cost = - np.sum(logprobs) / m
cost = np.squeeze(cost)
return cost
----------------------------------------
def backward_propagation(parameters, cache, X, Y):
m = X.shape[1]
W1 = parameters['W1']
W2 = parameters['W2']
A1 = cache['A1']
A2 = cache['A2']
dZ2 = A2 - Y
dW2 = np.dot(dZ2, A1.T) / m
db2 = np.sum(dZ2, axis=1, keepdims=True) / m
dZ1 = np.dot(W2.T, dZ2) * (1 - np.power(A1, 2)) #tanh
dW1 = np.dot(dZ1, X.T) / m
db1 = np.sum(dZ1, axis=1, keepdims=True) / m
grads = {'dW1': dW1, 'db1': db1, 'dW2': dW2, 'db2': db2}
return grads
--------------------------------------------
def update_parameters(parameters, grads, learning_rate = 1.2):
W1 = parameters['W1']
b1= parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
dW1 = grads['dW1']
db1 = grads['db1']
dW2 = grads['dW2']
db2 = grads['db2']
W1 = W1 -learning_rate * dW1
b1 = b1 - learning_rate * db1
W2 = W2 - learning_rate * dW2
b2 = b2 -learning_rate * db2
parameters = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
return parameters
-----------------------------------------------
def predict(parameters, X):
A2, cache = forward_propagation(X, parameters)
prediction = (A2 > 0.5) #0 or 1
return predictions
-----------------------------------------------
构建模型:
def nn_model(X, Y, n_h, num_iterations = 10000, print_cost=False):
np.random.seed(1)
n_x = layer_sizes(X, Y)[0]
n_y = layer_sizes(X, Y)[2]
parameters = initialize_parameters(n_x, n_h, n_y)
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
for i in range(0, num_interations):
A2, cache = forward_propagation(X, parameters)
cost = compute_cost(A2, Y, parameters)
grads = backward_propagation(parameters, cache, X, Y)
parameters = update_parameters(parameters, grads)
if print_cost and i % 100 == 0:
print('cost after interation %i: %f' %(i, cost))
return parameters
parameters = nn_model(X, Y, n_h =4, num_iterations = 1000, print_cost=True)
predictions = predict(parameters, X)
细调隐藏层规模:
#plt.figure(figsize = (16, 32))
hidden_layer_sizes = [1, 2, 3, 4, 5, 20, 50]
for i, n_h in enumerate(hidden_layer_sizes):
#plt.subplot(5, 2, i+1)
#plt.title('hidden layer of size %d' % n_h)
parameters = nn.model(X, Y, n_h, num_iterations = 5000)
#plot_decision_boundary(lambda x: predict(parameters, x.T), X, Y)
prediction = predict(parameters, X)
accuracy = float((np.dot(Y, predictions.T) + np.dot(1-Y, 1-prediction.T)) / float(Y.size)*100)
print('Accuracy for {} hidden units: {} %'.format(n_h, accuracy))