用scikit-learn学习线性回归和Ridge回归

线性回归是根据已有的输入和输出样本来学习得到预测模型。scikit-learn库可完成线性回归求得模型系数,Ridge回归进行正则化。

线性回归

'''
https://www.cnblogs.com/pinard/p/6013484.html
'''

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# from sklearn import datasets, linear_model

data = pd.read_csv('.\copy.CSV')
# print(data.head())    #数据前五行
# print(data.shape)   #数据维度

x = data[['AT', 'V', 'AP', 'RH']]   #样本特征(输入数据)
# print(x.head())

y = data[['PE']]    #输出样本
# print(y.head())

from sklearn.model_selection import train_test_split
#将数据分成训练集和验证集(75%,25%)
x_train, x_test, y_train, y_test = train_test_split(x,y,random_state=1)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(x_train, y_train)    #运行线性回归算法
print(linreg.intercept_, linreg.coef_)  #得到模型系数(b,w)

y_pred = linreg.predict(x_test)
from sklearn import metrics
print("MSE:", metrics.mean_squared_error(y_test, y_pred))   #模型评价 (均方差,均方根差)
print("RMSE:", np.sqrt(metrics.mean_squared_error(y_test,y_pred)))

from sklearn.model_selection import cross_val_predict
predicted = cross_val_predict(linreg, x, y, cv=10)  #交叉验证
print("MSE:", metrics.mean_squared_error(y, predicted))
print("RMSE:", np.sqrt(metrics.mean_squared_error(y, predicted)))

#画图真实值和预测值的变化关系,离中间的直线y=x直接越近的点代表预测损失越低
fig, ax = plt.subplots()
ax.scatter(y, predicted)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

Ridge回归

'''
https://www.cnblogs.com/pinard/p/6023000.html
'''

import matplotlib.pyplot as plt
# %matplotlib inline
import numpy as np
import pandas as pd
# from sklearn import datasets, linear_model

data = pd.read_csv('.\copy.CSV')

x = data[['AT', 'V', 'AP', 'RH']]   #样本特征(输入数据)
y = data[['PE']]

from sklearn.model_selection import train_test_split
#将数据分成训练集和验证集(75%,25%)
x_train, x_test, y_train, y_test = train_test_split(x,y,random_state=1)

from sklearn.linear_model import Ridge
ridge = Ridge(alpha=1)  #Ridge回归,a为1
ridge.fit(x_train, y_train)
print(ridge.coef_, ridge.intercept_)

from sklearn.linear_model import RidgeCV    #自动选取最优a
ridgecv = RidgeCV(alphas=[0.01, 0.1, 0.5, 1, 3, 4, 5, 7, 10, 20, 100])
ridgecv.fit(x_train, y_train)
print(ridgecv.alpha_)

你可能感兴趣的:(Python,机器学习,回归,scikit-learn,学习)