代码模拟线性回归解决机器学习问题

代码模拟线性回归解决机器学习问题

# 数据的导入
from sklearn.datasets import load_boston
X,y = load_boston(return_X_y = True)
print(X.shape,y.shape)
# (506, 13) (506,)
# 数据的预处理
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch.utils.data as Data
standardscaler = StandardScaler()
X = standardscaler.fit_transform(X)
train_x,test_x,train_y,test_y = train_test_split(X,y,test_size = 0.2)
print(train_x.shape,train_y.shape) #(404, 13) (404,)
print(test_x.shape,test_y.shape)   # (404, 13) (404,)
(102, 13) (102,)
# 数据的训练
w = np.random.rand(1,13)    # 参数
b = np.random.rand()        # 常数项
loss_all = []               # 训练误差
for i in range(30):
    pre_y = np.sum(w*train_x,axis=1 ) + b
    loss = np.sum(pow(pre_y - train_y,2))/(2 * len(train_x))              # 计算损失函数
    loss_all.append(loss)
    print('第{}次,损失是{}'.format(i+1,loss))
    w = w - 0.1 * np.dot(train_x.T,pre_y - train_y)/len(train_x)         # 参数更新
    b = b - 0.1 * np.sum(pre_y - train_y)/len(train_x)                   
    print("第{}次,系数是{}".format(i+1,w))
    print("第{}次,常系数是{}".format(i+1,b))
"""
第30次,损失是13.401357529069488
第30次,系数是[[-0.84860307  0.56084003 -0.40875167  0.9102585  -0.4299894   3.79253055
   0.18943605 -1.11629341  0.83174091 -0.66907291 -1.67139862  0.94988543
  -3.01703923]]
第30次,常系数是21.628916738586867
"""
import matplotlib.pyplot as plt
# 绘制损失图像
plt.plot(loss_all,'r-')
plt.title("Train Loss")
plt.show()

代码模拟线性回归解决机器学习问题_第1张图片

# 计算在测试集上的损失
pre_y = np.sum(w * test_x,axis =1) + b 
loss = np.sum(pow(test_y - pre_y,2))/len(test_x)
print("测试机上损失是:",loss)
# 测试机上损失是: 18.43885106692336

你可能感兴趣的:(机器学习,机器学习,python)