吴恩达机器学习第五周编程作业ex4答案

nnCostFunction.m

a1 = [ones(m, 1) X];   %5000x401
z2 = a1 * Theta1';     %5000x25   Theta1 25*401
a2 = sigmoid(z2);      %5000x25
a2 = [ones(m, 1) a2];  %5000x26
z3 = a2 * Theta2';     %5000x10   Theta2 10×26
a3 = sigmoid(z3);      %5000x10
h = a3;                %5000x10

u = eye(num_labels);   %一共有多少个标签
y = u(y,:);            % 5000 * 10 选出每一行的y值作为u的行标,将这行u替换对应行的y,妙啊...
J = 1/m*(sum(sum(-y .* log(h) - (1 - y) .* log(1 - h)))); % 必须要有点 .*
                                                          % sum先把行相加,再把列相加


regularization = lambda/(2 * m) * (sum(sum(Theta1(:,2:end) .^ 2)) + sum(sum(Theta2(:,2:end) .^ 2)));
J += regularization;

delta3 = a3 - y;                        % 5000 * 10
delta2 = delta3 * Theta2;               % 5000 * 26
delta2 = delta2(:,2:end);               % 5000 * 25
delta2 = delta2 .* sigmoidGradient(z2); % 5000 * 25

Delta1 = zeros(size(Theta1));           % 25 * 401 
Delta2 = zeros(size(Theta2));           % 10 * 26
Delta1 = Delta1 + delta2' * a1;         % 25 * 401 5000×25' * 5000x401 
Delta2 = Delta2 + delta3' * a2;         % 10 * 26  5000×10' * 5000x26
%我们用Delta来表示这个误差矩阵。第L层的第i个激活单元受到第j个参数影响而导致的误差。
Theta1_grad = 1 / m * Delta1 + lambda / m * Theta1;
Theta2_grad = 1 / m * Delta2 + lambda / m * Theta2;
Theta1_grad(:,1) = 1 / m * Delta1(:,1);
Theta2_grad(:,1) = 1 / m * Delta2(:,1);

sigmoidGradient.m

g = sigmoid(z) .* (1 - sigmoid(z));

randInitializeWeights.m

epsilon_init = 0.12;
W = rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init;

你可能感兴趣的:(机器学习)