python代码:
import pandas as pd
import matplotlib.pyplot as plt
path = 'ex1data1.txt'
data = pd.read_csv(path,header=None,names = ['Population','Profit'])
print(data)
data.plot(x='Population',y='Profit',kind='scatter',figsize=(10,8))
plt.show()
结果如下:
代价函数定义代码:
import numpy as np
def computeCost(x,y,theta):
total = np.power(((x*theta.T)-y),2)
return np.sum(total)/(2*len(x))
实现部分代码:
data.insert(0,'Ones',1)
#print(data)
cols = data.shape[1]
X = data.iloc[:,0:cols-1]
Y = data.iloc[:,cols-1:cols]
X = np.matrix(X.values)
Y = np.matrix(Y.values)
theta = np.matrix(np.array([0,0]))
print(computeCost(X,Y,theta))
结果是:32.072733877455676
定义方向代码:
def gradientDescent(x,y,theta,alpha,iters):
temp = np.matrix(np.zeros(theta.shape))
parameters = int(theta.ravel().shape[1])
cost = np.zeros(iters)
for i in range(iters):
error = (x*theta.T) - y
for j in range(parameters):
term = np.multiply(error,x[:,j])
temp[0,j] = theta[0,j] - ((alpha/len(x))*np.sum(term))
theta = temp
cost[i]=computeCost(x,y,theta)
return theta,cost
主代码:
alpha = 0.01
iters = 1000
g,cost = gradientDescent(X,Y,theta,alpha,iters)
print(g)
print(computeCost(X,Y,g))
x = np.linspace(data.Population.min(),data.Population.max(),100)
f = g[0,0]+g[0,1]*x
predict1 = [1,3.5]*g.T
print('predtct1 : ',predict1)
predict2 = [1,7]*g.T
print('predict2 : ',predict2)
结果如下:
[[-3.63029144 1.16636235]]
4.483388256587726
predtct1 : [[0.45197679]]
predict2 : [[4.53424501]]
线性回归图:
代码:
fig, ax = plt.subplots(figsize=(10,8))
ax.plot(x,f,'r',label='Prediction')
ax.scatter(data.Population,data.Profit,label='Training Data')
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predict Profit vs. Population Size')
plt.show()
py代码如下:
from mpl_toolkits.mplot3d import Axes3D
fig, ax = plt.subplots(figsize=(10,8))
ax.plot(x,f,'r',label='Prediction')
ax.scatter(data.Population,data.Profit,label='Training Data')
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predict Profit vs. Population Size')
plt.show()
theta0_vals = np.linspace(-10,20,20)
theta1_vals = np.linspace(-1,4,20)
J_vals = np.zeros((theta0_vals.shape[0],theta1_vals.shape[0]))
for i in range(theta0_vals.shape[0]):
for j in range(theta1_vals.shape[0]):
t = np.array([theta0_vals[i],theta1_vals[j]])
J_vals[i,j]=computeCost(X,Y,t)
fig = plt.figure()
ax = Axes3D(fig)
theta0_vals,theta1_vals = np.meshgrid(theta0_vals,theta1_vals)
plt.title('Visualizing Graph')
ax.plot_surface(theta0_vals,theta1_vals,J_vals,cmap='rainbow')
ax.set_xlabel('theta0')
ax.set_ylabel('theta1')
ax.set_zlabel('J')
plt.show()
结果如下:
等高线图:
plt.contourf(theta0_vals,theta1_vals,J_vals,10,cmap='rainbow')
C = plt.contour(theta0_vals,theta1_vals,J_vals,10,colors = 'black')
plt.clabel(C,inline=True,fontsize=10)
plt.plot(g[0,0], g[0,1], c='r', marker="x")
plt.show()
结果如下:
代码如下:
import pandas as pd
path = 'ex1data2.txt'
data = pd.read_csv(path,header=None,names = ['Size','Bedroom','Price'])
data = (data - data.mean())/data.std()
print(data.head())
结果如下:
Size | Bedroom | Price | |
---|---|---|---|
0 | 0.130010 | -0.223675 | 0.475747 |
1 | -0.504190 | -0.223675 | -0.084074 |
2 | 0.502476 | -0.223675 | 0.228626 |
3 | -0.735723 | -1.537767 | -0.867025 |
data.insert(0,'x0',1)
cols = data.shape[1]
print(cols)
X = data.iloc[:,0:cols-1]
Y = data.iloc[:,cols-1:cols]
X = np.matrix(X.values)
Y = np.matrix(Y.values)
theta = np.matrix(np.zeros(data.shape[1]-1))
alpha = 0.01
iter = 1000
g, cost = gradientDescent(X,Y,theta,alpha,iter)
print(g)
结果如下:
[[-1.10910099e-16 8.78503652e-01 -4.69166570e-02]]
代码:
def normalEquation(x,y):
result = np.linalg.inv(x.T@x)@x.T@y
return result
theta = normalEquation(X,Y)
print(theta)
结果如下:
[[-1.11022302e-16]
[ 8.84765988e-01]
[-5.31788197e-02]]
代码:
fig, ax = plt.subplots(figsize=(10,8))
ax.plot(np.arange(iter),cost,'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Coat')
ax.set_title('Error vs. Training Epoch')
plt.show()
结果如下:
[-5.31788197e-02]]
代码:
fig, ax = plt.subplots(figsize=(10,8))
ax.plot(np.arange(iter),cost,'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Coat')
ax.set_title('Error vs. Training Epoch')
plt.show()
结果如下: