数据集
Size,Price
1,11
2,21
3,31
4,41
5,51
6,61
7,71
8,81
9,91
10,101
11,111
12,121
13,131
14,141
15,151
16,161
17,171
18,181
19,191
20,201
21,211
22,221
23,231
24,241
25,251
26,261
27,271
28,281
29,291
30,301
31,311
32,321
33,331
34,341
35,351
36,361
37,371
38,381
39,391
40,401
代码
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
dataset = pd.read_csv('Linear_Regression.csv')
X0 = dataset.iloc[:, 0].values
Y = dataset.iloc[:, -1].values
# 最后10行用作测试集
d = len(X0) - 10
X0t = np.transpose(np.array([X0[:d]]))
X0t_test = np.transpose(np.array([X0[d:]]))
X = np.insert(X0t, 0, values=np.ones(len(X0t)), axis=1)
X0_test = np.insert(X0t_test, 0, values=np.ones(len(X0t_test)), axis=1)
Xt = np.transpose(X)
XtX = np.dot(Xt, X)
XtY = np.dot(Xt, Y[:d])
beta = np.linalg.solve(XtX, XtY)
print(beta)
for data, actual in zip(X0[d:], Y[d:]):
x = np.array([1, data])
prediction = np.dot(x, beta)
print('prediction = ' + str(prediction) + ' actual = ' + str(actual))
plt.figure(figsize=(10, 12)) # 设置画布大小
figure = plt.subplot(211) # 将画布分成2行1列,当前位于第一块子画板
plt.scatter(X0[:d], Y[:d], color='red') # 描出训练集对应点
plt.plot(X0[:d], np.dot(X[:d], np.transpose(beta)), color='black') # 画出预测的模型图
plt.xlabel('Size')
plt.ylabel('Price')
plt.title('Train set')
# 将模型应用于测试集,检验拟合结果
plt.subplot(212)
plt.scatter(X0[d:], Y[d:], color='red')
plt.plot(X0[d:], np.dot(X0_test[:], np.transpose(beta)), color='black')
plt.xlabel('Size')
plt.ylabel('Price')
plt.title('Test set')
plt.show()