% Load Data
data = load('ex1data2.txt');
X = data(:, 1:2);
y = data(:, 3);
m = length(y);

% Print out some data points
% First 10 examples from the dataset
fprintf(' x = [%.0f %.0f], y = %.0f \n', [X(1:10,:) y(1:10,:)]');

吴恩达写的代码,data = load('ex1data2.txt');将数据先读进来,然后用X = data(:, 1:2);y = data(:, 3);将数据中的一二列取出来放到X作为特征值,把第三列取出来放到y作为解,顺便用m = length(y);把样本数求出来。


function [X_norm, mu, sigma] = featureNormalize(X)
%FEATURENORMALIZE Normalizes the features in X 
%   FEATURENORMALIZE(X) returns a normalized version of X where
%   the mean value of each feature is 0 and the standard deviation
%   is 1. This is often a good preprocessing step to do when
%   working with learning algorithms.

% You need to set these values correctly
X_norm = X;
mu = zeros(1, size(X, 2));
sigma = zeros(1, size(X, 2));

% ====================== YOUR CODE HERE ======================
% Instructions: First, for each feature dimension, compute the mean
%               of the feature and subtract it from the dataset,
%               storing the mean value in mu. Next, compute the 
%               standard deviation of each feature and divide
%               each feature by it's standard deviation, storing
%               the standard deviation in sigma. 
%               Note that X is a matrix where each column is a 
%               feature and each row is an example. You need 
%               to perform the normalization separately for 
%               each feature. 
% Hint: You might find the 'mean' and 'std' functions useful.


% ============================================================





% Scale features and set them to zero mean
[X, mu, sigma] = featureNormalize(X);

添加 x 0 x_0 x0

h θ = θ 0 + θ 1 x 1 + θ 2 x 2 + ⋯ + θ n x n h_\theta=\theta_0+\theta_1x_1+\theta_2x_2+\cdots+\theta_nx_n hθ=θ0+θ1x1+θ2x2++θnxn
为了让这个方程更整齐,补一位 x 0 = 1 x_0=1 x0=1,就有
h θ = θ 0 x 0 + θ 1 x 1 + θ 2 x 2 + ⋯ + θ n x n = [ x 0 x 1 ⋯ x n ] [ θ 0 θ 1 ⋮ θ n ] h_\theta=\theta_0x_0+\theta_1x_1+\theta_2x_2+\cdots+\theta_nx_n= \left[\begin{matrix} x_0&x_1&\cdots&x_n \end{matrix}\right] \left[\begin{matrix} \theta_0\\ \theta_1\\ \vdots\\ \theta_n \end{matrix}\right] hθ=θ0x0+θ1x1+θ2x2++θnxn=[x0x1xn]θ0θ1θn
X = [ x 0 ( 1 ) x 1 ( 1 ) ⋯ x n ( 1 ) x 0 ( 2 ) x 1 ( 2 ) ⋯ x n ( 2 ) ⋮ ⋮ ⋱ ⋮ x 0 ( m ) x 1 ( m ) ⋯ x n ( m ) ] , Θ = [ θ 0 θ 1 ⋮ θ n ] , Y = [ y 1 y 2 ⋮ y m ] X= \left[\begin{matrix} x_0^{(1)}&x_1^{(1)}&\cdots&x_n^{(1)}\\ x_0^{(2)}&x_1^{(2)}&\cdots&x_n^{(2)}\\ \vdots & \vdots & \ddots &\vdots\\ x_0^{(m)}&x_1^{(m)}&\cdots&x_n^{(m)}\\ \end{matrix}\right] ,\Theta=\left[\begin{matrix} \theta_0\\ \theta_1\\ \vdots \\ \theta_n\\ \end{matrix}\right], Y=\left[\begin{matrix} y_1\\ y_2\\ \vdots \\ y_m\\ \end{matrix}\right] X=x0(1)x0(2)x0(m)x1(1)x1(2)x1(m)xn(1)xn(2)xn(m),Θ=θ0θ1θn,Y=y1y2ym
H θ = X Θ H_\theta=X\Theta\\ Hθ=XΘ

% Add intercept term to X
X = [ones(m, 1) X];


θ j : = θ j − α ∂ J ( θ ) ∂ θ j \theta_j:=\theta_j-\alpha\frac{\partial J(\theta)}{\partial\theta_j} θj:=θjαθjJ(θ)
J ( θ ) = 1 2 m ∑ i = 1 m ( θ 0 x 0 ( i ) + θ 1 x 1 ( i ) + ⋯ + θ n x n ( i ) − y ( i ) ) 2 J(\theta)=\frac{1}{2m}\sum_{i=1}^m(\theta_0x_0^{(i)}+\theta_1x_1^{(i)}+\cdots+\theta_nx_n^{(i)}-y^{(i)})^2 J(θ)=2m1i=1m(θ0x0(i)+θ1x1(i)++θnxn(i)y(i))2
∂ J ( θ ) ∂ θ j = 1 m ∑ i = 1 m x j ( i ) ( θ 0 x 0 ( i ) + θ 1 x 1 ( i ) + ⋯ + θ n x n ( i ) − y ( i ) ) \frac{\partial J(\theta)}{\partial \theta_j}=\frac{1}{m}\sum_{i=1}^m x_j^{(i)}(\theta_0x_0^{(i)}+\theta_1x_1^{(i)}+\cdots+\theta_nx_n^{(i)}-y^{(i)}) θjJ(θ)=m1i=1mxj(i)(θ0x0(i)+θ1x1(i)++θnxn(i)y(i))
[ x j ( 1 ) x j ( 2 ) ⋯ x j ( m ) ] ( [ x 0 ( 1 ) x 1 ( 1 ) ⋯ x n ( 1 ) x 0 ( 2 ) x 1 ( 2 ) ⋯ x n ( 2 ) ⋮ ⋮ ⋱ ⋮ x 0 ( m ) x 1 ( m ) ⋯ x n ( m ) ] [ θ 0 θ 1 ⋮ θ n ] − [ y 1 y 2 ⋮ y m ] ) \left[\begin{matrix} x_j^{(1)}&x_j^{(2)}&\cdots&x_j^{(m)} \end{matrix}\right] \left( \left[\begin{matrix} x_0^{(1)}&x_1^{(1)}&\cdots&x_n^{(1)}\\ x_0^{(2)}&x_1^{(2)}&\cdots&x_n^{(2)}\\ \vdots & \vdots & \ddots &\vdots\\ x_0^{(m)}&x_1^{(m)}&\cdots&x_n^{(m)}\\ \end{matrix}\right] \left[\begin{matrix} \theta_0\\ \theta_1\\ \vdots \\ \theta_n\\ \end{matrix}\right]- \left[\begin{matrix} y_1\\ y_2\\ \vdots \\ y_m\\ \end{matrix}\right] \right) [xj(1)xj(2)xj(m)]x0(1)x0(2)x0(m)x1(1)x1(2)x1(m)xn(1)xn(2)xn(m)θ0θ1θny1y2ym
Θ : = Θ − α m X T ( X Θ − Y ) \Theta:=\Theta-\frac{\alpha}{m}X^T(X\Theta-Y) Θ:=ΘmαXT(XΘY)

function [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters)
%GRADIENTDESCENTMULTI Performs gradient descent to learn theta
%   theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by
%   taking num_iters gradient steps with learning rate alpha

% Initialize some useful values
m = length(y); % number of training examples
J_history = zeros(num_iters, 1);

for iter = 1:num_iters

    % ====================== YOUR CODE HERE ======================
    % Instructions: Perform a single gradient step on the parameter vector
    %               theta. 
    % Hint: While debugging, it can be useful to print out the values
    %       of the cost function (computeCostMulti) and gradient here.


    % ============================================================

    % Save the cost J in every iteration    
    J_history(iter) = computeCostMulti(X, y, theta);




% Run gradient descent
% Choose some alpha value
alpha = 0.1;
num_iters = 400;

% Init Theta and Run Gradient Descent 
theta = zeros(3, 1);
[theta, ~] = gradientDescentMulti(X, y, theta, alpha, num_iters);

% Display gradient descent's result
fprintf('Theta computed from gradient descent:\n%f\n%f\n%f',theta(1),theta(2),theta(3))


要预测一个特征值为1650 3的数据,只需要把他归一化带进方程即可:

% Estimate the price of a 1650 sq-ft, 3 br house
% ====================== YOUR CODE HERE ======================

price = theta(1,1)+theta(2,1)*(1650-mu(1))/sigma(1)+theta(3,1)*(3-mu(2))/sigma(2); % Enter your price formula here

% ============================================================

fprintf('Predicted price of a 1650 sq-ft, 3 br house (using gradient descent):\n $%f', price);
