from sklearn import linear_model
reg = linear_model.LinearRegression()
reg.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
print(reg.coef_)
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
X, y = datasets.load_diabetes(return_X_y=True)
# X,y 为数组类型
print(X.shape) # (442, 10)
print(y.shape) # (442,)
# 只用一列(特征)做演示
X = X[:, np.newaxis, 2] # X[:, 2] 只有一维
print(X)
print(X.shape)
# 划分数据集,后20个用作测试
X_train = X[:-20]
X_test = X[-20:]
y_train = y[:-20]
y_test = y[-20:]
# Create linear regression object
reg = linear_model.LinearRegression()
#可以将所有系数限制为非负系数,当它们表示一些物理或自然非负数量(例如,频率计数或商品价格)时,这可能很有用。
#LinearRegression(positive=True)
# Train the model using the training sets
reg.fit(X_train, y_train)
# Make predictions using the testing set
y_pred = reg.predict(X_test)
# The coefficients
print("Coefficients: \n", reg.coef_)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
print("Coefficient of determination: %.2f" % r2_score(y_test, y_pred))
# MSE和R方值是回归模型的评价指标
# Plot outputs
plt.scatter(X_test, y_test, color="black")
plt.plot(X_test, y_pred, color="blue", linewidth=3)
# x,y 轴不显示label
plt.xticks(())
plt.yticks(())
plt.title("regression plot")
plt.show()
from sklearn import linear_model
reg = linear_model.Ridge(alpha=.5)
reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1])
print(reg.coef_)
print(reg.intercept_)
from sklearn import linear_model
reg = linear_model.Lasso(alpha=0.1)
reg.fit([[0, 0], [1, 1]], [0, 1])
print(reg)
print(reg.predict([[1, 1]]))
## Elastic Net
# 参数:alpha (α) and l1_ratio (ρ)
from sklearn import linear_model
enet = linear_model.ElasticNet(alpha=0.1, l1_ratio=0.7)
enet.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1])
print(enet)
print(enet.predict([[1, 1]]))
## Multi-task Lasso
# 多任务Lasso, Y是一个二维形状数组(n_个样本,n_个任务)
from sklearn import linear_model
clf = linear_model.MultiTaskLasso(alpha=0.1)
clf.fit([[0, 1], [1, 2], [2, 4]], [[0, 0], [1, 1], [2, 3]])
print(clf.coef_)
print(clf.intercept_)
## Multi-task Elastic-Net
# 多任务弹性网模型用于联合估计多元回归问题的稀疏系数:Y是一个二维形状数组(n_个样本,n_个任务)。
from sklearn import linear_model
clf = linear_model.MultiTaskElasticNet(alpha=0.1)
clf.fit([[0, 0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])
print(clf.coef_)
print(clf.intercept_)
## LARS Lasso
# 最小角度回归(LARS)是一种针对高维数据的回归算法
from sklearn import linear_model
reg = linear_model.LassoLars(alpha=.1, normalize=False)
reg.fit([[0, 0], [1, 1]], [0, 1])
print(reg.coef_)
## Orthogonal Matching Pursuit (OMP)
from sklearn.linear_model import OrthogonalMatchingPursuit
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=1, normalize=False)
omp.fit([[0, 0], [1, 1]], [0, 1])
print(omp.coef_)
## Bayesian Ridge Regression
# The prior: w - spherical Gaussian distribution
from sklearn import linear_model
X = [[0., 0.], [1., 1.], [2., 2.], [3., 3.]]
Y = [0., 1., 2., 3.]
reg = linear_model.BayesianRidge()
reg.fit(X, Y)
print(reg.predict([[1, 0.]]))
## Automatic Relevance Determination (ARD) regression
# ARD回归与贝叶斯岭回归非常相似,但会导致系数更稀疏w
# The prior: w - axis-parallel, elliptical Gaussian distribution
from sklearn.linear_model import ARDRegression
clf = ARDRegression()
clf.fit([[0, 0], [1, 1]], [0, 1])
print(clf.coef_)
## 广义线性回归
# 1.有连接函数 2. 优化目标mse被分布偏差取代
# Tweedie distribution
from sklearn.linear_model import TweedieRegressor
reg = TweedieRegressor(power=1, alpha=0.5, link='log')
reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2])
print(reg.coef_)
print(reg.intercept_)
# Poisson distribution
from sklearn import linear_model
clf = linear_model.PoissonRegressor()
X = [[1, 2], [2, 3], [3, 4], [4, 3]]
y = [12, 17, 22, 21]
clf.fit(X, y)
print(clf.score(X, y))
print(clf.coef_)
print(clf.intercept_)
print(clf.predict([[1, 1], [3, 4]]))
## 更多线性模型,查看:
from sklearn import linear_model
print(dir(linear_model))
参考: https://scikit-learn.org/stable/modules/linear_model.html#perceptron