线性回归是根据已有的输入和输出样本来学习得到预测模型。scikit-learn库可完成线性回归求得模型系数,Ridge回归进行正则化。
'''
https://www.cnblogs.com/pinard/p/6013484.html
'''
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# from sklearn import datasets, linear_model
data = pd.read_csv('.\copy.CSV')
# print(data.head()) #数据前五行
# print(data.shape) #数据维度
x = data[['AT', 'V', 'AP', 'RH']] #样本特征(输入数据)
# print(x.head())
y = data[['PE']] #输出样本
# print(y.head())
from sklearn.model_selection import train_test_split
#将数据分成训练集和验证集(75%,25%)
x_train, x_test, y_train, y_test = train_test_split(x,y,random_state=1)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(x_train, y_train) #运行线性回归算法
print(linreg.intercept_, linreg.coef_) #得到模型系数(b,w)
y_pred = linreg.predict(x_test)
from sklearn import metrics
print("MSE:", metrics.mean_squared_error(y_test, y_pred)) #模型评价 (均方差,均方根差)
print("RMSE:", np.sqrt(metrics.mean_squared_error(y_test,y_pred)))
from sklearn.model_selection import cross_val_predict
predicted = cross_val_predict(linreg, x, y, cv=10) #交叉验证
print("MSE:", metrics.mean_squared_error(y, predicted))
print("RMSE:", np.sqrt(metrics.mean_squared_error(y, predicted)))
#画图真实值和预测值的变化关系,离中间的直线y=x直接越近的点代表预测损失越低
fig, ax = plt.subplots()
ax.scatter(y, predicted)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()
'''
https://www.cnblogs.com/pinard/p/6023000.html
'''
import matplotlib.pyplot as plt
# %matplotlib inline
import numpy as np
import pandas as pd
# from sklearn import datasets, linear_model
data = pd.read_csv('.\copy.CSV')
x = data[['AT', 'V', 'AP', 'RH']] #样本特征(输入数据)
y = data[['PE']]
from sklearn.model_selection import train_test_split
#将数据分成训练集和验证集(75%,25%)
x_train, x_test, y_train, y_test = train_test_split(x,y,random_state=1)
from sklearn.linear_model import Ridge
ridge = Ridge(alpha=1) #Ridge回归,a为1
ridge.fit(x_train, y_train)
print(ridge.coef_, ridge.intercept_)
from sklearn.linear_model import RidgeCV #自动选取最优a
ridgecv = RidgeCV(alphas=[0.01, 0.1, 0.5, 1, 3, 4, 5, 7, 10, 20, 100])
ridgecv.fit(x_train, y_train)
print(ridgecv.alpha_)