logistic regression 逻辑回归 matlab实现

Load Data

数据格式依旧是每行一个样例,最后一列为标签。

fileName = 'xxx.txt';
data = load(fileName);
m = size(data, 1);
n = size(data, 2);
X = data(:, 1 : n - 1);
y = data(:, n);

Plot

这里出的图是针对二维情况的。

plotData(X, y);

hold on;
xlabel('Exam 1 score')
ylabel('Exam 2 score')
legend('Admitted', 'Not admitted')
hold off;

plotData函数,用到了一点matlab矩阵操作的小技巧。

function plotData(X, y)
    figure;
    hold on;
    pos = find(y == 1);
    neg = find(y == 0);
    plot(X(pos, 1), X(pos, 2), 'k+', 'LineWidth', 2, 'MarkerSize', 7);
    plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', 'MarkerSize', 7);
    hold off;
end

Compute Cost and Gradient

为之后的fminunc函数提供costFunction的计算方法。
这里,在线性情况,即低阶情况下,分类边界为低阶函数,直接加上一列1就行了。
而在分类边界明显不为直线时,可以使用mapFeature函数,将原来低次幂的特征映射为高次组合。
而在计算代价函数的时候,为了防止由于映射的次数过高产生过拟合,加入了正则化参数来限制theta。

%---低阶---%
X = [ones(m, 1) X];

%---高阶---%
%X = mapFeature(X(:,1), X(:,2));

initial_theta = zeros(size(X, 2), 1);

% 正则化参数,不对theta0惩罚,在costfunction中体现。避免过拟合。
% lambda值越大,theta值越小。
lambda = 1;

[cost, grad] = costFunction(initial_theta, X, y, lambda);

fprintf('Cost at initial theta (zeros): %f\n', cost);

mapFeature函数

function out = mapFeature(X1, X2)
    degree = 6;
    out = ones( size( X1(:, 1) ) );
    for i = 1 : degree
        for j = 0 : i
            out(:, end + 1) = (X1.^(i - j)) .* (X2 .^ j);
        end
    end

end

costFunction函数

function [J, grad] = costFunction(theta, X, y, lambda)
    m = length(y);
    grad = zeros(size(theta));

    J = 1 / m * sum( -y .* log(sigmoid(X * theta)) - (1 - y) .* log(1 - sigmoid(X * theta)) ) + lambda / (2 * m) * sum( theta(2 : size(theta), :) .^ 2 );

    for j = 1 : size(theta)
        if j == 1
            grad(j) = 1 / m * sum( (sigmoid(X * theta) - y)' * X(:, j) ); else grad(j) = 1 / m * sum( (sigmoid(X * theta) - y)' * X(:, j) ) + lambda / m * theta(j);
        end
    end

end

sigmod函数

function g = sigmoid(z)
    g = zeros(size(z));
    g = 1 ./ (1 + exp(-z));
end

Regularization and Accuracies

判边界的时候基于0.5判断就行了。

% GradObj-on 表示costFunction同时返回cost和grad
options = optimset('GradObj', 'on', 'MaxIter', 400);

% f = @(t)(...t...) 创建了一个函数句柄。
% f为函数名称,t为输入变量,后面为执行语句。
f = @(t)(costFunction(t, X, y, lambda));
[theta, J, exit_flag] = fminunc(f, initial_theta, options);

% Plot Boundary
plotDecisionBoundary(theta, X, y);
hold on;
title(sprintf('lambda = %g', lambda))

% Labels and Legend
xlabel('Microchip Test 1')
ylabel('Microchip Test 2')

legend('y = 1', 'y = 0', 'Decision boundary')
hold off;

% Compute accuracy on our training set
p = predict(theta, X);

fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100);

plotDecisionBoundary函数

function plotDecisionBoundary(theta, X, y)
%PLOTDECISIONBOUNDARY Plots the data points X and y into a new figure with
%the decision boundary defined by theta
% PLOTDECISIONBOUNDARY(theta, X,y) plots the data points with + for the 
% positive examples and o for the negative examples. X is assumed to be 
% a either 
% 1) Mx3 matrix, where the first column is an all-ones column for the 
% intercept.
% 2) MxN, N>3 matrix, where the first column is all-ones

    % Plot Data
    plotData(X(:,2:3), y);
    hold on

    if size(X, 2) <= 3
        % Only need 2 points to define a line, so choose two endpoints
        plot_x = [min(X(:,2))-2, max(X(:,2))+2];

        % Calculate the decision boundary line
        plot_y = (-1./theta(3)).*(theta(2).*plot_x + theta(1));

        % Plot, and adjust axes for better viewing
        plot(plot_x, plot_y)

        % Legend, specific for the exercise
        legend('Admitted', 'Not admitted', 'Decision Boundary')
        axis([30, 100, 30, 100])
    else
        % Here is the grid range
        u = linspace(-1, 1.5, 50);
        v = linspace(-1, 1.5, 50);

        z = zeros(length(u), length(v));
        % Evaluate z = theta*x over the grid
        for i = 1:length(u)
            for j = 1:length(v)
                z(i,j) = mapFeature(u(i), v(j))*theta;
            end
        end
        z = z'; % important to transpose z before calling contour

        % Plot z = 0
        % Notice you need to specify the range [0, 0]
        contour(u, v, z, [0, 0], 'LineWidth', 2)
    end
    hold off

end

predic函数

function p = predict(theta, X)
    %m = size(X, 1);
    %p = zeros(m, 1);
    p = sigmoid(X * theta) >= 0.5;
end

你可能感兴趣的:(matlab)