机器学习算法之多变量线性回归练习(二)

题目要求

1.按要求完成下面的各项需求。
利用python编写如下程序,
现有一个循环发电场样本数据集,其中包括训练集(ccpp_train.txt文件)和测试集(ccpp_test.txt文件)。数据集格式如下:

AT(温度) V(压力) AP(湿度) RH(压强) PE(输出电力)
8.34 40.77 1010.84 90.01 480.48
23.64 58.49 1011.4 74.2 445.75
29.74 56.9 1007.15 41.91 438.76

请通过Python实现线性回归模型,并用此模型预测输出电力,具体要求如下:
完成数据集的读取
实现代价函数
实现梯度下降函数
要求输出迭代过程中的代价函数值
完成测试集的数据预测,并计算在测试集上的代价函数值
以横轴为真实值,纵轴为预测值,画出散点图

代码实现

import numpy as np
from matplotlib import pyplot as plt
# 读取数据
data_train = np.loadtxt(r'ccpp_train.txt',delimiter=',')
data_test = np.loadtxt(r'ccpp_train.txt',delimiter=',')


# 定义数据处理函数
def preprocess(data):
    # 数据提取
    X = data[:,:-1]
    y = data[:,-1]
    # 特征缩放
    X -= np.mean(X,axis=0)
    X /= np.std(X,axis=0,ddof=1)
    # 数据初始化
    X = np.c_[np.ones(len(X)),X]
    y = np.c_[y]
    # 数据处理完毕 返回
    return X,y


# 调用数据处理函数获得处理好的数据
X_train,y_train = preprocess(data_train)
X_test,y_test = preprocess(data_test)


# 定义模型
def model(X,theta):
    h = np.dot(X,theta)
    return h


# 定义代价函数
def costFunction(h,y):
    m = len(h)
    J = (1.0/(2 * m)) * np.sum(np.square(h -y))
    return J


# 定义梯度下降
def gradeDesc(X,y,alpha=0.01,iter_num=2000):
    # 数据准备
    # 获取维度
    m,n = X.shape
    # 初始化theta
    theta = np.zeros((n,1))
    # 初始化代表值
    J_history = np.zeros(iter_num)
    # 执行梯度下降
    for i in range(iter_num):
        # 调用模型获得数据的预测值
        h = model(X,theta)
        # 调用代价函数评价模型性能
        J_history[i] = costFunction(h,y)
        # 输出代价函数值
        if i % 500 == 0:
            print(J_history[i])
        # 求取deltatheta
        deltatheta = (1.0/m) * np.dot(X.T,h-y)
        # 更新theta
        theta -= alpha * deltatheta
    # 模型训练完毕,返回theta
    return J_history,theta


# 调用梯度下降函数获得训练好的theta和带价值记录
J_history,theta = gradeDesc(X_train,y_train,alpha=0.01,iter_num=5000)
# 将theta和数据传入模型,获得预测值
y_test_h = model(X_test,theta)
# 将预测值和真实值传入代价函数,求代价函数值
J_test_h = costFunction(y_test_h,y_test)
print('测试集代价函数值是:')
print(J_test_h)


# 求取精度
def score(h,y):
    u = np.sum(np.square(h - y))
    v = np.sum(np.square(y - np.mean(y)))
    return 1 - u/v


# 求取精度
ss = score(y_test_h,y_test)
print("精度是:")
print(ss)

# 画出代价曲线图
plt.plot(J_history)
plt.show()

# 画出真实值与预测值的对比图
plt.scatter(y_test,y_test)
plt.scatter(y_test,y_test_h)
plt.show()



数据准备

训练集样本抽样

13.51,43.41,1015.94,75.22,463.86
18.87,60.07,1015.15,70.91,453.27
13.42,41.74,1020.96,61.8,473.45
18.36,56.65,1020.29,82,456.49
26.8,72.58,1008.94,78.24,428.62
14.46,42.86,1031.34,69.84,464.44
14.93,43.02,1012.11,45.56,468.19
20.19,44.57,1009.2,72.13,454.36
29.79,77.17,1009.68,64,432.84
13.9,39.59,1011.84,94.74,465
26.94,73.21,1002.83,91.25,431.19
25.31,65.48,1018.31,55.57,439.72
26.64,58.69,1007.99,75.68,439.32
14.02,40.75,1016.05,70.65,470.48
19.15,59.21,1018.41,88.9,450.26
22.46,48.41,1008.66,80.85,442.57
14.12,41.39,1018.73,76.51,472.88
24.57,49.5,1014.22,56.31,455.72
21.58,63.87,1015.27,63.15,451.88
19.3,46.93,1014.83,66.71,456.62
22.43,55.97,1008.97,85.98,443.74
30.91,76.2,1008.53,58.08,434.94
16.02,71.14,1019.75,70.42,456.35
24.14,60.07,1016.56,58.08,440.17
16.05,43.14,1010.67,79.36,463.06
22.7,64.05,1012.65,89.69,448.76
8.83,36.3,1027.08,72.69,479.86
24.61,69.68,1012.06,92.47,438.51
30.61,69.13,1009.32,55.17,429.1
5.68,40.77,1022.49,90.6,487.58
7.98,39.61,1018.57,77.04,479.78

测试集样本抽样

22.46,48.41,1008.66,80.85,442.57
14.12,41.39,1018.73,76.51,472.88
24.57,49.5,1014.22,56.31,455.72
21.58,63.87,1015.27,63.15,451.88
19.3,46.93,1014.83,66.71,456.62
22.43,55.97,1008.97,85.98,443.74
30.91,76.2,1008.53,58.08,434.94
16.02,71.14,1019.75,70.42,456.35
24.14,60.07,1016.56,58.08,440.17
16.05,43.14,1010.67,79.36,463.06
22.7,64.05,1012.65,89.69,448.76
8.83,36.3,1027.08,72.69,479.86
24.61,69.68,1012.06,92.47,438.51
30.61,69.13,1009.32,55.17,429.1
5.68,40.77,1022.49,90.6,487.58
7.98,39.61,1018.57,77.04,479.78
5.49,38.5,1012.18,79.33,490.84
26.31,71.29,1009.87,84.16,432.92
16.27,56.89,1013.74,84.36,454.88
12.73,44.34,1015.11,93.55,472.94
20.28,62.52,1017.89,75.67,452.45
4.96,39.4,1003.58,92.22,486.09
8.07,43.69,1017.05,87.34,485.18
8.74,40.03,1016.81,93.37,481.07
20.25,55.5,1020.03,69.33,455.13
20.13,60.07,1014.79,63.57,453.49
18.99,44.6,1014.7,40.11,463.48
24.4,67.45,1015.63,57.1,435.47
6.17,39.33,1012.57,93.32,491.54
22.49,45.61,1013.1,75.69,455.12
22.04,57.32,1012.54,62.17,447.27
21.81,63.77,1014.28,83.66,444.52
10.16,41.62,1013.15,94.3,465.05
10.07,44.68,1023.44,90.95,477.52
23.61,63.94,1012.9,87.06,441.57
23.34,59.44,1012.67,80.76,445.24
15.02,42.07,1017.89,83.68,460.82
7.6,41.04,1021.82,88.97,475.32
25.42,66.05,1016.74,68.92,442.6
23.71,60.23,1009.76,90.67,439.66
32.33,69.89,1014.18,50.93,427.29
6.49,39.33,1010.85,91.85,489.22
7.57,37.49,1009.73,83.07,481.98
13.89,44.84,1023.66,92.97,466.74
8.9,36.24,1013.29,89.35,479.03
15.49,54.3,1017.59,71.26,464.24
23.74,65.34,1013.7,62.9,447.31
26.02,68.67,1006.73,75.19,440.12
8.61,37.49,1009.35,82.62,477.13
13.31,41.26,1020.83,79.55,462.87
22.93,62.26,1011.25,83.66,438.34
22.83,70.79,1006.36,92.07,438
24.52,59.15,1014.03,74.83,439.55
30.55,70.04,1010.51,49.37,429.56
17.36,43.96,1013.02,79.59,466.36
24.21,71.77,1004.52,84.96,433.42
27.19,64.27,1013.06,58.13,444.54
19.05,59.21,1017.99,89.53,451
23.34,45.61,1012.73,74.09,455.82
32.69,72.86,1003.57,56.84,431.76
8.73,36.18,1013.66,77.74,479.25
10.41,44.68,1023.53,91.38,474.7
22.28,58.12,1014.54,83.27,448.97
25.14,60.93,1007.44,76.71,437.4
18.26,61.27,1019.1,74.74,428.67
23.74,63.9,1014.73,81.9,445.47
31.46,70.79,1003.54,59.51,425.68
31.68,70.79,1004.05,54.5,429.55
26.62,72.43,1006.79,82.74,430.22
21.47,50.12,1009.19,93.68,448.11
13.47,41.14,1026.09,82.96,463.67
14.8,43.99,1022.89,85.25,461.97
31.12,67.69,1005.3,50.46,425.21
29.2,64.84,1009.94,55.37,441.9
12.42,43.14,1015.88,79.48,471.1
17.51,53.16,1013.13,82.86,457.45
13.87,42.99,1007.45,81.52,471.12
25.07,77.95,1012.87,83,438.55
12.88,42.74,1026.25,74.54,470.89
23.31,60.08,1017.14,64.35,452.65
12.33,38.91,1017.24,79.84,472.49
20.51,39.72,1002.25,47.97,452.39
16.2,45.76,1014.73,89.84,460.87
29.6,71.58,1010.34,52.56,434.64
25.94,66.49,1012.83,61.81,433.38
7.73,39.04,1018.61,68.23,482.39
15.08,42.77,1018.67,73.89,461.6
9.83,41.17,1019.34,72.29,478.21
27.44,52.3,1008.15,58.92,441.75
12.01,41.48,1017.75,66.67,469.08
20.99,50.78,1008.55,75.14,449.07
5.4,39.4,1011.45,91.84,485.86
7.87,41.06,1020.91,87.64,486.57
20.78,62.52,1017.58,73.3,452.3
31.26,68.94,1005.94,39.49,438.03
20.03,60.77,1017.23,87.82,449.31
23.34,63.73,1012.1,79.11,443.68
23.14,60.27,1018.51,80.54,442.59
29.75,73.5,1011.13,67.31,433.63
21.93,62.91,1013.45,74.62,449.17
9.93,39.04,1023.78,77.08,480.54
18.7,52.72,1024.84,57.72,458.06
15.67,45.17,1018.73,94.74,462.09
23.62,45.87,1007.75,58.69,445.55
6.22,38.68,1017.87,69.41,483.55
24.66,63.73,1011.4,74.52,444.37
11.94,44.6,1018.69,85.33,468.53
7.11,43.13,1018.96,87.82,486.11
21.54,58.12,1015.33,78.67,454.32
29.45,64.96,1005.52,59.92,433.04
31.85,68.3,1014.76,47.06,428.72
27.3,65.12,1016.24,44.87,442.78
13.51,39.31,1012.18,75.19,466.46
16.73,39.64,1008.94,74.91,464.46
25.45,69.59,1008.51,83.17,445.61
12.88,44.34,1016.03,88.51,474.94
22.29,43.79,1015.68,41.75,461.23
12.07,40.81,1025.63,68.02,475.96
18.21,62.26,1011.97,87.28,455.88
17.53,42.24,1016.9,60.95,470.63
23.66,61.86,1013.33,83.09,444.27
14.18,40.69,1014.73,74.88,471.52
13.85,41.39,1018.62,75.55,471.45

你可能感兴趣的:(机器学习项目练习,机器学习,多变量线性回归,特征缩放)