完成了Cousera上的Machine Learning,现在将几次exercise的答案整理在这里,供以后的同学参考。
这里仅列出需要自己编程的部分,某些部分会给出说明。
computeCost.m
p = X * theta;
J = (p - y)' * (p - y); J = J/(m*2);
gradientDescent.m
p = theta(1) - alpha * (sum((X * theta - y).*X(:, 1))) / m;
q = theta(2) - alpha * (sum((X * theta - y).*X(:, 2))) / m;
% ============================================================
theta(1) = p;
theta(2) = q;
sigmoid.m
xy = size(z);
x = xy(1);
y = xy(2);
for i = 1:x
for j = 1: y
g(i,j) = g(i,j) + 1/(1 + exp(-z(i,j)));
end
end
costFunction.m
for i = 1:m
J = J + (- y(i) * log(sigmoid(X(i,:) * theta)) - (1 - y(i)) * log(1 - sigmoid(X(i, :) * theta)));
end
J = J/m;
for j = 1:length(theta)
for i = 1:m
grad(j) = grad(j) + (sigmoid(X(i,:) * theta) - y(i)) * X(i,j);
end
end
grad = grad/m;
costFunctionReg.m
for i = 1:m
J = J + (- y(i) * log(sigmoid(X(i,:) * theta)) - (1 - y(i)) * log(1 - sigmoid(X(i, :) * theta)));
end
J = J/m;
J = J + sum(theta(2:length(theta)).^2) * lambda / (2*m);
for j = 1:length(theta)
for i = 1:m
grad(j) = grad(j) + (sigmoid(X(i,:) * theta) - y(i)) * X(i,j);
end
if j>1
grad(j) = grad(j) + lambda * theta(j);
end
end
grad = grad/m;
lrCostFunction.m
J = sum((- y .* log(sigmoid(X * theta)) - (1 - y) .* log(1 - sigmoid(X * theta)))/m);
J = J + (lambda/(2 * m))*sum(theta(2:end).^2);
p = transpose(X) * (sigmoid(X * theta) - y);
grad(1) = p(1)/m;
grad(2:end) = (lambda * theta(2:end) + p(2:end))/m;
%grad(2:end) = (lambda * theta + transpose(X) * (sigmoid(X * theta) - y))(2:end)/m;
oneVsall.m
initial_theta = zeros(n + 1, 1);
options = optimset('GradObj', 'on', 'MaxIter', 50);
for i=1:num_labels
[all_theta(i, :)] = fmincg(@(t)(lrCostFunction(t, X, (y == i), lambda)), initial_theta, options);
end
predictOneVsAll.m
[~, p]= max(X*all_theta.', [], 2);
predict.m
X = [ones(m, 1), X];
X1 = sigmoid(X * Theta1.');
X1 = [ones(size(X1, 1), 1), X1];
[~, p] = max(X1 * Theta2.', [], 2);
nnCostFunction.m
a1 = [ones(m, 1), X];
% z2 = a1 * Theta1';
z2 = a1 * Theta1.';
a2 = sigmoid(z2);
a2 = [ones(m, 1), a2];
% z3 = a2 * Theta2';
z3 = a2 * Theta2.';
a3 = sigmoid(z3);
% a3 remmebers the h(xi) in every row.
% the shape of a3 is 5000*10
% 将y转换为矩阵形式
Y = zeros(m, num_labels);
for i=1 : m
Y(i, y(i)) = 1;
end
% for i =1:m
% J = J +sum( - Y(i, :) * log(a3(i, :)') - (1 - Y(i, :)) * log(1 - a3(i, :)'));
% end
% J = J/m;
Jk = zeros(num_labels, 1);
for k=1:num_labels
Jk(k) = (-Y(:, k).' * log(a3(:, k))) - ( (1 - Y(:, k)).' * log(1 - a3(:, k)));
end
J = sum(Jk)/m;
J = J + lambda*(sum(sum(Theta1(:, 2:end).^2)) + sum(sum(Theta2(:, 2:end).^2)))/(2*m);
% Unroll gradients
delta3 = a3 - Y;
delta2 = delta3 * Theta2.*(a2.*(1 - a2));
delta2 = delta2(:, 2: end);
Delta2 = zeros(size(delta3, 2), size(a2, 2));
Delta1 = zeros(size(delta2, 2), size(a1, 2));
for i=1:m
Delta2 = Delta2 + delta3(i, :).' * a2(i, :); Delta1 = Delta1 + delta2(i, :).' * a1(i, :);
end
Theta1_grad = Delta1/m;
Theta1_grad(:, 2:end) = Theta1_grad(:, 2:end) + Theta1(:, 2:end)*(lambda/m);
Theta2_grad = Delta2/m;
Theta2_grad(:, 2:end) = Theta2_grad(:, 2:end) + Theta2(:, 2:end)*(lambda/m);
% grad = [Theta1_grad(:) ; Theta2_grad(:)];
sigmoidCostFunction.m
gz = 1.0 ./ (1.0 + exp(z));
g = gz.*(1 - gz);
linearRegCostFunction.m
J = (sum((X*theta - y).^2)+(sum(theta.^2) - theta(1).^2)*lambda)/(2*m);
theta1 = [0 ; theta(2:size(theta), :)];
grad = (X'*(X*theta - y) + lambda*theta1)/m;
learningCurve.m
n = size(Xval, 1);
for i=1:m
[theta] = trainLinearReg(X(1:i, :), y(1:i), lambda);
error_train(i) = sum((X(1:i, :)*theta - y(1:i)).^2)/(2 * i);
error_val(i) = sum((Xval*theta - yval).^2)/(2*n);
end
polyFeatures.m
for i=1:p
X_poly(:, i) = X.^i;
end
gaussianKernel.m
sim = exp(- sum((x1-x2).^2)/(2*sigma^2));
processEmail.m
for i=1:length(vocabList)
v = strcmp(str, vocabList(i));
if v==1
word_indices = [word_indices; i];
end
end
findClosestCentroids.m
for i=1:size(X, 1)
idx(i) = 1;
min = sum((centroids(1, :) - X(i, :)).^2);
for j=2:K
if min>sum((centroids(j, :) - X(i, :)).^2)
min = sum((centroids(j, :) - X(i, :)).^2);
idx(i) = j;
end
end
end
computeCentroids.m
此处用到了arrayfun()函数,此函数将一个集合中某个元素的位置全都能找出来,如:
a = {‘A’, ‘B’, ‘A’, ‘C’, ‘D’};
b = {‘A’};
n = 1:length(a);
c = arrayfun(@(x) ismember(b, a(x)), n);
c = n(c);
得到
c = {1, 3}
p = 1:m;
for i=1:K
Ck = i;
isCk = arrayfun(@(x)ismember(Ck, idx(x)), p);
isCk = p(isCk);
centroids(Ck, :) = sum(X(isCk, :))/length(isCk);
end
pca.m
% compute covariance matrix
sigma = X'*X/m;
% compute the eigenvectors and eigenvalues of the covariance matrix.
[U, S, V] = svd(sigma);
projectData.m
U_reduce = U(:, 1:K);
Z = X*U_reduce;
recoverData.m
U_reduce = U(:, 1:K);
X_rec = Z*U_reduce';
estimateGaussian.m
mu = sum(X, 1)./m;
sigma = zeros(m, n);
for i=1:m
sigma(i,:) = (X(i, :) - mu).^2;
end
sigma2 = sum(sigma, 1)/m;
selectThreshold.m
tp = 0; fp = 0; fn = 0;
for i=1:length(yval)
if (pval(i)<epsilon)&& yval(i)==1
tp = tp + 1;
end
if (pval(i)<epsilon)&& yval(i)==0
fp = fp + 1;
end
if (pval(i)>=epsilon)&& yval(i)==1
fn = fn + 1;
end
end
prec = tp/(tp + fp);
rec = tp/(tp + fn);
F1 = 2*prec*rec/(prec + rec);
if F1>bestF1
bestF1 = F1;
bestEpsilon = epsilon;
end
cofiCostFunction.m
此处使用了arrayfun()函数,不懂的可以去看ex7中的说明
y = reshape(Y(1:end), num_movies, num_users);
r = reshape(R(1:end), num_movies, num_users);
for j=1:num_users
a = r(:, j);
n = 1:num_movies;
flaged = arrayfun(@(x) ismember(1, a(x)), n);
flaged = n(flaged);
for i=1:length(flaged)
J = J + (Theta(j,:) * X(flaged(i),:)' - y(flaged(i),j))^2; end end J = J/2; J = J + (lambda/2)*(sum(sum(Theta.^2)) + sum(sum(X.^2))); for i=1:num_movies a = r(i,:); n = 1:num_users; flaged = arrayfun(@(x) ismember(1, a(x)), n); flaged = n(flaged); for j = 1:length(flaged) X_grad(i,:) = X_grad(i,:) + (Theta(flaged(j),:) * X(i,:)' - y(i,flaged(j)))*Theta(flaged(j),:);
end
end
X_grad = X_grad + X.*lambda;
for j = 1:num_users
a = r(:,j);
n = 1:num_movies;
flaged = arrayfun(@(x) ismember(1, a(x)), n);
flaged = n(flaged);
for i = 1:length(flaged)
Theta_grad(j,:) = Theta_grad(j,:) + (Theta(j,:) * X(flaged(i),:)' - y(flaged(i),j))*X(flaged(i),:); end end Theta_grad = Theta_grad + Theta.*lambda;