【吴恩达机器学习】线性回归习题

一、单变量线性回归

​在单变量线性回归这一部分中,我们将使用一个变量实现线性回归,以预测食品卡车的利润。

​假设你是一家餐饮连锁店的老板,并且正在考虑在不同的城市开设新的门店。该连锁店已经在各个城市开了新的分店,并且你有这些城市的利润和人口数据。你想使用此数据来帮助你选择要扩展到的下一个城市。

​文件ex1data1.txt包含我们线性回归问题的数据集。第一列是城市的人口,第二列是该城市的餐车的利润,利润的负值表示亏损。

python实现:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

path = 'ex1data1.txt'
data = pd.read_csv(path,header=None,names=['Population','Profit'])
# print(data.head()) # 预览数据
# print(data.describe())
# data.plot(kind='scatter',x='Population',y='Profit',figsize=(12,8))
# plt.show()
data.insert(0,'Ones',1)
# print(data.shape) (97,3)
cols = data.shape[1]
X = data.iloc[:,0:cols-1]  #左闭右开
Y = data.iloc[:,cols-1:cols]

X = np.matrix(X.values)  #97行2列
y = np.matrix(Y.values)  #97行1列
theta = np.matrix(np.array([0,0]))  #1行2列

# X = np.array(X)
# Y = np.array(Y)
# # theta = np.matrix(np.array([0,0]))
# theta = np.array([0,0]).reshape(1,2)

# print(X.shape)
# print(Y.shape)
# print(theta.shape)

# 计算代价函数
def computCost(X,y,theta):
    inner = np.power((X*theta.T-y),2)
    return np.sum(inner)/(2*len(X))

# j = computCost(X,Y,theta)
# print(j)

# 梯度下降
def gradientDescent(X,y,theta,alpha,iters):
    temp = np.matrix(np.zeros(theta.shape)) # 构建零值矩阵
    parameters = int(theta.ravel().shape[1]) #ravel()把多维数组降至一维
    cost = np.zeros(iters) # 构建iter个0的数组

    for i in range(iters):
        error = (X*theta.T)-y
        for j in range(parameters):
            term = np.multiply(error,X[:,j]) # 点乘
            temp[0,j] = theta[0,j] - ((alpha/len(X)) * np.sum(term))

        theta = temp
        cost[i] = computCost(X,y,theta)

    return  theta,cost

alpha = 0.01
iters = 1000

# theta,cost = gradientDescent(X,y,theta,alpha,iters)
# print(theta)
# print(cost)

# j = computCost(X,Y,theta)
# # print(j)

# 绘图
g,cost = gradientDescent(X,y,theta,alpha,iters)
# x = np.linspace(data.Population.min(),data.Population.max(),100)
# f = g[0,0] + (g[0,1] * x)
# fig,ax = plt.subplots(figsize = (12,8))
# ax.plot(x,f,'r',label='Prediction')
# ax.scatter(data.Population,data.Profit,label='Training Data')
# ax.legend(loc=4)
# ax.set_xlabel('Population')
# ax.set_ylabel('Profit')
# plt.show()

fig,ax = plt.subplots(figsize=(12,8))
ax.plot(np.arange(iters),cost,'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs.Training Epoch')

plt.show()

二、多变量线性回归

​在这一部分中,我们将使用多个变量实现线性回归以预测房屋价格。
​文件ex1data2.txt包含某地区房屋价格的训练集。第一列是房屋的大小(以平方英尺为单位),第二列是卧室的数量,第三列是房屋的价格。

python代码实现:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

path = 'ex1data2.txt'
data2 = pd.read_csv(path,header=None,names=['Size','Bedrooms','Price'])

# 归一化
data2 = (data2 - data2.mean()) / data2.std()
data2.insert(0,'Ones',1)
# print(data2.shape)

cols = data2.shape[1]
X = data2.iloc[:,0:cols-1]  #左闭右开
Y = data2.iloc[:,cols-1:cols]

X = np.matrix(X.values)  #47行3列
y = np.matrix(Y.values)  #47行1列
theta = np.matrix(np.array([0,0,0]))

def computCost(X,y,theta):
    inner = np.power((X*theta.T-y),2)
    return np.sum(inner)/(2*len(X))

def gradientDescent(X,y,theta,alpha,iters):
    temp = np.matrix(np.zeros(theta.shape)) # 构建零值矩阵
    parameters = int(theta.ravel().shape[1]) #ravel()把多维数组降至一维
    cost = np.zeros(iters) # 构建iter个0的数组

    for i in range(iters):
        error = (X*theta.T)-y
        for j in range(parameters):
            term = np.multiply(error,X[:,j]) # 点乘
            temp[0,j] = theta[0,j] - ((alpha/len(X)) * np.sum(term))

        theta = temp
        cost[i] = computCost(X,y,theta)

    return  theta,cost

alpha = 0.01
iters = 1000

theta,cost = gradientDescent(X,y,theta,alpha,iters)
j = computCost(X,Y,theta)
# print(j)

# 绘图
fig,ax = plt.subplots(figsize=(12,8))
ax.plot(np.arange(iters),cost,'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs.Training Epoch')
plt.show()

三、正规方程(Normal Equation)

np.linalg.inv() 求逆操作

.dot()函数可以通过numpy库调用,也可以由数组实例对象进行调用。

a.dot(b)和np.dot(a,b)效果相同

import numpy as np
import pandas as pd

path = 'ex1data1.txt'
data = pd.read_csv(path,header=None,names=['Population','Profit'])
data.insert(0,'Ones',1)
cols = data.shape[1]
X = data.iloc[:,0:cols-1]  #左闭右开
Y = data.iloc[:,cols-1:cols]

X = np.matrix(X.values)  #97行2列
Y = np.matrix(Y.values)  #97行1列

def normalequ(X,Y):
    theta = np.linalg.inv((X.T).dot(X)).dot(X.T).dot(Y)
    return theta

theta = normalequ(X,Y)
print(theta)

你可能感兴趣的:(机器学习,机器学习)