这是习题和答案的下载地址,全网最便宜,只要一积分哦~~~
https://download.csdn.net/download/wukongakk/10602657
这是我总结的网课里有关神经网络的笔记,不要积分的~~~
https://blog.csdn.net/WukongAKK/article/details/81746916
神经网络的练习,比work3part2更加全面,包括了反向传播,显示隐藏层,参数正确性的验证等几个方面。
脚本太长,就不往上放了。
见wor3part1的讲解。
略过。
先不加正则化算一次J, 即lemda = 0。
%% unregularized Feedforward cost function lambda=0
% % 计算前向传输 Add ones to the X data matrix -jin
% X = [ones(m, 1) X];
% a2 = sigmoid(X * Theta1'); % 第二层激活函数输出
% a2 = [ones(m, 1) a2]; % 第二层加入b
% a3 = sigmoid(a2 * Theta2');
%
% cost = Y .* log(a3) + (1 - Y ) .* log( (1 - a3)); % cost是m*K(5000*10)的结果矩阵 sum(cost(:))全部求和
% J= -1 / m * sum(cost(:));
%% regularized Feedforward cost function lambda=1
% 计算前向传输 Add ones to the X data matrix -jin
X = [ones(m, 1) X];
a2 = sigmoid(X * Theta1'); % 第二层激活函数输出
a2 = [ones(m, 1) a2]; % 第二层加入b
a3 = sigmoid(a2 * Theta2');
temp1 = [zeros(size(Theta1,1),1) Theta1(:,2:end)]; % 先把theta(1)拿掉,不参与正则化
temp2 = [zeros(size(Theta2,1),1) Theta2(:,2:end)];
temp1 = sum(temp1 .^2); % 计算每个参数的平方,再就求和。sum(A)是对A的列求和,sum(A,2)是对A的行求和,sum(A(:))是对A求和。
temp2 = sum(temp2 .^2);
cost = Y .* log(a3) + (1 - Y ) .* log( (1 - a3)); % cost是m*K(5000*10)的结果矩阵 sum(cost(:))全部求和
J= -1 / m * sum(cost(:)) + lambda/(2*m) * ( sum(temp1(:))+ sum(temp2(:)) );
function g = sigmoidGradient(z)
%SIGMOIDGRADIENT returns the gradient of the sigmoid function
%evaluated at z
% g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function
% evaluated at z. This should work regardless if z is a matrix or a
% vector. In particular, if z is a vector or matrix, you should return
% the gradient for each element.
g = zeros(size(z));
% ====================== YOUR CODE HERE ======================
% Instructions: Compute the gradient of the sigmoid function evaluated at
% each value of z (z can be a matrix, vector or scalar).
g = sigmoid(z) .* (1 - sigmoid(z));
function W = randInitializeWeights(L_in, L_out)
%RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in
%incoming connections and L_out outgoing connections
% W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights
% of a layer with L_in incoming connections and L_out outgoing
% connections.
%
% Note that W should be set to a matrix of size(L_out, 1 + L_in) as
% the column row of W handles the "bias" terms
%
% You need to return the following variables correctly
W = zeros(L_out, 1 + L_in);
% ====================== YOUR CODE HERE ======================
% Instructions: Initialize W randomly so that we break the symmetry while
% training the neural network.
%
% Note: The first row of W corresponds to the parameters for the bias units
%
epsilon_init = 0.12;
W = rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init;
% =========================================================================
end
这部分涉及到了反向传播和验证导数两个内容。
反向传播
%% 计算 Gradient
delta_1 = zeros(size(Theta1));
delta_2 = zeros(size(Theta2));
for t = 1:m
% step 1
a_1 = X(t,:)'; % 取第t行元素并转置为列向量。
% a_1 = [1 ; a_1];
z_2 = Theta1 * a_1;
a_2 = sigmoid(z_2);
a_2 = [1 ; a_2]; % 加一个偏置单元1
z_3 = Theta2 * a_2;
a_3 = sigmoid(z_3);
% step 2
err_3 = zeros(num_labels,1); % 构建10*1的0矩阵
for k = 1:num_labels
err_3(k) = a_3(k) - (y(t) == k); %计算输出层的error
end
% step 3
err_2 = Theta2' * err_3; % err_2有26行!!!
err_2 = err_2(2:end) .* sigmoidGradient(z_2); % 去掉第一个误差值,减少为25. sigmoidGradient(z_2)只有25行!!!
% step 4
delta_2 = delta_2 + err_3 * a_2';
delta_1 = delta_1 + err_2 * a_1';
end
% step 5
Theta1_temp = [zeros(size(Theta1,1),1) Theta1(:,2:end)];
Theta2_temp = [zeros(size(Theta2,1),1) Theta2(:,2:end)];
Theta1_grad = 1 / m * delta_1 + lambda/m * Theta1_temp;
Theta2_grad = 1 / m * delta_2 + lambda/m * Theta2_temp ;
% -------------------------------------------------------------
% =========================================================================
% Unroll gradients
grad = [Theta1_grad(:) ; Theta2_grad(:)];
验证导数
a.先生成一个小的神经网络,这个神经网络的参数由debugInitializeWeights生成,这个函数通过sin函数使每次生成的小神经网络的参 数一致
b.用computeNumericalGradient函数来使用导数的定义求导数,然后把结果和用反向传播算的结果作比较。
function checkNNGradients(lambda)
%CHECKNNGRADIENTS Creates a small neural network to check the
%backpropagation gradients
% CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
% backpropagation gradients, it will output the analytical gradients
% produced by your backprop code and the numerical gradients (computed
% using computeNumericalGradient). These two gradient computations should
% result in very similar values.
%
if ~exist('lambda', 'var') || isempty(lambda)
lambda = 0;
end
input_layer_size = 3;
hidden_layer_size = 5;
num_labels = 3;
m = 5;
% We generate some 'random' test data
Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size);
Theta2 = debugInitializeWeights(num_labels, hidden_layer_size);
% Reusing debugInitializeWeights to generate X
X = debugInitializeWeights(m, input_layer_size - 1);
y = 1 + mod(1:m, num_labels)'; %mod(1:m, num_labels) = (1:m).mod(num_labels);
% Unroll parameters
nn_params = [Theta1(:) ; Theta2(:)];
% Short hand for cost function
costFunc = @(p) nnCostFunction(p, input_layer_size, hidden_layer_size, ...
num_labels, X, y, lambda);
[cost, grad] = costFunc(nn_params);
numgrad = computeNumericalGradient(costFunc, nn_params);
% Visually examine the two gradient computations. The two columns
% you get should be very similar.
disp([numgrad grad]);
fprintf(['The above two columns you get should be very similar.\n' ...
'(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']);
% Evaluate the norm of the difference between two solutions.
% If you have a correct implementation, and assuming you used EPSILON = 0.0001
% in computeNumericalGradient.m, then diff below should be less than 1e-9
diff = norm(numgrad-grad)/norm(numgrad+grad); %norm是求欧几里得范数
fprintf(['If your backpropagation implementation is correct, then \n' ...
'the relative difference will be small (less than 1e-9). \n' ...
'\nRelative Difference: %g\n'], diff);
end
function W = debugInitializeWeights(fan_out, fan_in)
%DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in
%incoming connections and fan_out outgoing connections using a fixed
%strategy, this will help you later in debugging
% W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights
% of a layer with fan_in incoming connections and fan_out outgoing
% connections using a fix set of values
%
% Note that W should be set to a matrix of size(1 + fan_in, fan_out) as
% the first row of W handles the "bias" terms
%
% Set W to zeros
W = zeros(fan_out, 1 + fan_in);
% Initialize W using "sin", this ensures that W is always of the same
% values and will be useful for debugging
W = reshape(sin(1:numel(W)), size(W)) / 10;
% =========================================================================
end
function numgrad = computeNumericalGradient(J, theta)
%COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
%and gives us a numerical estimate of the gradient.
% numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
% gradient of the function J around theta. Calling y = J(theta) should
% return the function value at theta.
% Notes: The following code implements numerical gradient checking, and
% returns the numerical gradient.It sets numgrad(i) to (a numerical
% approximation of) the partial derivative of J with respect to the
% i-th input argument, evaluated at theta. (i.e., numgrad(i) should
% be the (approximately) the partial derivative of J with respect
% to theta(i).)
%
numgrad = zeros(size(theta));
perturb = zeros(size(theta));
e = 1e-4;
for p = 1:numel(theta) % n = numel(A)返回A中元素个数。
% Set perturbation vector
perturb(p) = e;
loss1 = J(theta - perturb);
loss2 = J(theta + perturb);
% Compute Numerical Gradient
numgrad(p) = (loss2 - loss1) / (2*e);
perturb(p) = 0;
end
end
在cost函数中加入正则化。
%% regularized Feedforward cost function lambda=1
% 计算前向传输 Add ones to the X data matrix -jin
X = [ones(m, 1) X];
a2 = sigmoid(X * Theta1'); % 第二层激活函数输出
a2 = [ones(m, 1) a2]; % 第二层加入b
a3 = sigmoid(a2 * Theta2');
temp1 = [zeros(size(Theta1,1),1) Theta1(:,2:end)]; % 先把theta(1)拿掉,不参与正则化
temp2 = [zeros(size(Theta2,1),1) Theta2(:,2:end)];
temp1 = sum(temp1 .^2); % 计算每个参数的平方,再就求和。sum(A)是对A的列求和,sum(A,2)是对A的行求和,sum(A(:))是对A求和。
temp2 = sum(temp2 .^2);
cost = Y .* log(a3) + (1 - Y ) .* log( (1 - a3)); % cost是m*K(5000*10)的结果矩阵 sum(cost(:))全部求和
J= -1 / m * sum(cost(:)) + lambda/(2*m) * ( sum(temp1(:))+ sum(temp2(:)) );
训练神经网络。
% You have now implemented all the code necessary to train a neural
% network. To train your neural network, we will now use "fmincg", which
% is a function which works similarly to "fminunc". Recall that these
% advanced optimizers are able to train our cost functions efficiently as
% long as we provide them with the gradient computations.
%
fprintf('\nTraining Neural Network... \n')
% After you have completed the assignment, change the MaxIter to a larger
% value to see how more training helps.
options = optimset('MaxIter', 400);
% You should also try different values of lambda
lambda = 1;
% Create "short hand" for the cost function to be minimized
costFunction = @(p) nnCostFunction(p, ...
input_layer_size, ...
hidden_layer_size, ...
num_labels, X, y, lambda);
% Now, costFunction is a function that takes in only one argument (the
% neural network parameters)
[nn_params, cost] = fmincg(costFunction, initial_nn_params, options);
% Obtain Theta1 and Theta2 back from nn_params
Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
hidden_layer_size, (input_layer_size + 1));
Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
num_labels, (hidden_layer_size + 1));
fprintf('Program paused. Press enter to continue.\n');
pause;
% You can now "visualize" what the neural network is learning by
% displaying the hidden units to see what features they are capturing in
% the data.
fprintf('\nVisualizing Neural Network... \n')
displayData(Theta1(:, 2:end));
fprintf('\nProgram paused. Press enter to continue.\n');
pause;
最后预测一波
迭代次数为50时,准确度为 95.60%
迭代次数为100时,准确度为97.74%
迭代次数为400时,准确度为99.54%
function p = predict(Theta1, Theta2, X)
%PREDICT Predict the label of an input given a trained neural network
% p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
% trained weights of a neural network (Theta1, Theta2)
% Useful values
m = size(X, 1);
num_labels = size(Theta2, 1);
% You need to return the following variables correctly
p = zeros(size(X, 1), 1);
h1 = sigmoid([ones(m, 1) X] * Theta1');
h2 = sigmoid([ones(m, 1) h1] * Theta2');
[dummy, p] = max(h2, [], 2);
% =========================================================================
end
for t = 1:m
% step 1
a_1 = X(t,:)'; % 取第t行元素并转置为列向量。
% a_1 = [1 ; a_1];
z_2 = Theta1 * a_1;
a_2 = sigmoid(z_2);
a_2 = [1 ; a_2]; % 加一个偏置单元1
z_3 = Theta2 * a_2;
a_3 = sigmoid(z_3);
% step 2
err_3 = zeros(num_labels,1); % 构建10*1的0矩阵
for k = 1:num_labels
err_3(k) = a_3(k) - (y(t) == k); %计算输出层的error
end
% step 3
err_2 = Theta2' * err_3; % err_2有26行!!!
err_2 = err_2(2:end) .* sigmoidGradient(z_2); % 去掉第一个误差值,减少为25. sigmoidGradient(z_2)只有25行!!!
% step 4
delta_2 = delta_2 + err_3 * a_2';
delta_1 = delta_1 + err_2 * a_1';
end