【机器学习】回归算法总结 | 线性回归 | 岭回归 | | 套索回归 | 弹性网络回归 | KNN | CART| SVM

参考教材:《机器学习python实践》

一、线性回归

本题的演示都以波士顿房价数据集为参考,它有13个输入特征,一个输出特征(房价):
【机器学习】回归算法总结 | 线性回归 | 岭回归 | | 套索回归 | 弹性网络回归 | KNN | CART| SVM_第1张图片

线性回归:

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from pandas.plotting import scatter_matrix
file_name='E:/数学建模2022/算法/回归算法/boston_house_prices.csv'
names=['CRIM','ZN','INDUS','CARS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO'
       ,'B','LSTAT','MEDV']
f=open(file_name)#因为中文要报错,所以多了这么一个步骤
data=pd.read_csv(f,names=names)

array=data.values #从dataframe数据格式转为矩阵格式,重要的一步
X=array[:,:13] #输入特征
Y=array[:,13] #标签label
seed=7
num_folds=10 #通常取3,5,10,不知道设置为多少时,设为10
#K折交叉验证分离的核心函数
kfold=KFold(n_splits=num_folds,random_state=seed,shuffle=True) #n_splits这里指要分成多少块
model=LinearRegression() #选择模型
result=cross_val_score(model,X,Y,cv=kfold,scoring='neg_mean_squared_error')#neg_mean_squared_error去掉负号,就是均方误差MSE

print('每一折的NMSE如下')
print(result)
print('评估结果(平均NMSE)如下:{}'.format(result.mean()))


二、岭回归

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from pandas.plotting import scatter_matrix
file_name='E:/数学建模2022/算法/回归算法/boston_house_prices.csv'
names=['CRIM','ZN','INDUS','CARS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO'
       ,'B','LSTAT','MEDV']
f=open(file_name)#因为中文要报错,所以多了这么一个步骤
data=pd.read_csv(f,names=names)

array=data.values #从dataframe数据格式转为矩阵格式,重要的一步
X=array[:,:13] #输入特征
Y=array[:,13] #标签label
seed=7
num_folds=10 #通常取3,5,10,不知道设置为多少时,设为10
#K折交叉验证分离的核心函数
kfold=KFold(n_splits=num_folds,random_state=seed,shuffle=True) #n_splits这里指要分成多少块
model=Ridge() #选择模型
result=cross_val_score(model,X,Y,cv=kfold,scoring='neg_mean_squared_error')#neg_mean_squared_error去掉负号,就是均方误差MSE

print('每一折的NMSE如下')
print(result)
print('评估结果(平均NMSE)如下:{}'.format(result.mean()))


三、套索回归

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Lasso
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from pandas.plotting import scatter_matrix
file_name='E:/数学建模2022/算法/回归算法/boston_house_prices.csv'
names=['CRIM','ZN','INDUS','CARS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO'
       ,'B','LSTAT','MEDV']
f=open(file_name)#因为中文要报错,所以多了这么一个步骤
data=pd.read_csv(f,names=names)

array=data.values #从dataframe数据格式转为矩阵格式,重要的一步
X=array[:,:13] #输入特征
Y=array[:,13] #标签label
seed=7
num_folds=10 #通常取3,5,10,不知道设置为多少时,设为10
#K折交叉验证分离的核心函数
kfold=KFold(n_splits=num_folds,random_state=seed,shuffle=True) #n_splits这里指要分成多少块
model=Lasso() #选择模型
result=cross_val_score(model,X,Y,cv=kfold,scoring='neg_mean_squared_error')#neg_mean_squared_error去掉负号,就是均方误差MSE

print('每一折的NMSE如下')
print(result)
print('评估结果(平均NMSE)如下:{}'.format(result.mean()))


四、弹性网络回归

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from pandas.plotting import scatter_matrix
file_name='E:/数学建模2022/算法/回归算法/boston_house_prices.csv'
names=['CRIM','ZN','INDUS','CARS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO'
       ,'B','LSTAT','MEDV']
f=open(file_name)#因为中文要报错,所以多了这么一个步骤
data=pd.read_csv(f,names=names)

array=data.values #从dataframe数据格式转为矩阵格式,重要的一步
X=array[:,:13] #输入特征
Y=array[:,13] #标签label
seed=7
num_folds=10 #通常取3,5,10,不知道设置为多少时,设为10
#K折交叉验证分离的核心函数
kfold=KFold(n_splits=num_folds,random_state=seed,shuffle=True) #n_splits这里指要分成多少块
model=ElasticNet() #选择模型
result=cross_val_score(model,X,Y,cv=kfold,scoring='neg_mean_squared_error')#neg_mean_squared_error去掉负号,就是均方误差MSE

print('每一折的NMSE如下')
print(result)
print('评估结果(平均NMSE)如下:{}'.format(result.mean()))




五、KNN回归

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from pandas.plotting import scatter_matrix
file_name='E:/数学建模2022/算法/回归算法/boston_house_prices.csv'
names=['CRIM','ZN','INDUS','CARS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO'
       ,'B','LSTAT','MEDV']
f=open(file_name)#因为中文要报错,所以多了这么一个步骤
data=pd.read_csv(f,names=names)

array=data.values #从dataframe数据格式转为矩阵格式,重要的一步
X=array[:,:13] #输入特征
Y=array[:,13] #标签label
seed=7
num_folds=10 #通常取3,5,10,不知道设置为多少时,设为10
#K折交叉验证分离的核心函数
kfold=KFold(n_splits=num_folds,random_state=seed,shuffle=True) #n_splits这里指要分成多少块
model=KNeighborsRegressor() #选择模型
result=cross_val_score(model,X,Y,cv=kfold,scoring='neg_mean_squared_error')#neg_mean_squared_error去掉负号,就是均方误差MSE

print('每一折的NMSE如下')
print(result)
print('评估结果(平均NMSE)如下:{}'.format(result.mean()))

六、分类与回归树

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from pandas.plotting import scatter_matrix
file_name='E:/数学建模2022/算法/回归算法/boston_house_prices.csv'
names=['CRIM','ZN','INDUS','CARS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO'
       ,'B','LSTAT','MEDV']
f=open(file_name)#因为中文要报错,所以多了这么一个步骤
data=pd.read_csv(f,names=names)

array=data.values #从dataframe数据格式转为矩阵格式,重要的一步
X=array[:,:13] #输入特征
Y=array[:,13] #标签label
seed=7
num_folds=10 #通常取3,5,10,不知道设置为多少时,设为10
#K折交叉验证分离的核心函数
kfold=KFold(n_splits=num_folds,random_state=seed,shuffle=True) #n_splits这里指要分成多少块
model=DecisionTreeRegressor() #选择模型
result=cross_val_score(model,X,Y,cv=kfold,scoring='neg_mean_squared_error')#neg_mean_squared_error去掉负号,就是均方误差MSE

print('每一折的NMSE如下')
print(result)
print('评估结果(平均NMSE)如下:{}'.format(result.mean()))


七.支持向量机

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from pandas.plotting import scatter_matrix
file_name='E:/数学建模2022/算法/回归算法/boston_house_prices.csv'
names=['CRIM','ZN','INDUS','CARS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO'
       ,'B','LSTAT','MEDV']
f=open(file_name)#因为中文要报错,所以多了这么一个步骤
data=pd.read_csv(f,names=names)

array=data.values #从dataframe数据格式转为矩阵格式,重要的一步
X=array[:,:13] #输入特征
Y=array[:,13] #标签label
seed=7
num_folds=10 #通常取3,5,10,不知道设置为多少时,设为10
#K折交叉验证分离的核心函数
kfold=KFold(n_splits=num_folds,random_state=seed,shuffle=True) #n_splits这里指要分成多少块
model=SVR() #选择模型
result=cross_val_score(model,X,Y,cv=kfold,scoring='neg_mean_squared_error')#neg_mean_squared_error去掉负号,就是均方误差MSE

print('每一折的NMSE如下')
print(result)
print('评估结果(平均NMSE)如下:{}'.format(result.mean()))


总结

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVR
from pandas.plotting import scatter_matrix
file_name='E:/数学建模2022/算法/回归算法/boston_house_prices.csv'
names=['CRIM','ZN','INDUS','CARS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO'
       ,'B','LSTAT','MEDV']
f=open(file_name)#因为中文要报错,所以多了这么一个步骤
data=pd.read_csv(f,names=names)

array=data.values #从dataframe数据格式转为矩阵格式,重要的一步
X=array[:,:13] #输入特征
Y=array[:,13] #标签label

seed=7
num_folds=10 #通常取3,5,10,不知道设置为多少时,设为10
#K折交叉验证分离的核心函数
kfold=KFold(n_splits=num_folds,random_state=seed,shuffle=True) #n_splits这里指要分成多少块
models={}
models['LR']=LinearRegression() #选择模型
models['Ridge']=Ridge() #选择模型
models['lasso']=Lasso() #选择模型
models['ElasticNet']=ElasticNet() #选择模型
models['KNN']=KNeighborsRegressor() #选择模型
models['CART']=DecisionTreeRegressor() #选择模型
models['SVM']=SVR() #选择模型
results=[]
for name in models:
    result = cross_val_score(models[name], X, Y, cv=kfold, scoring='neg_mean_squared_error')  # 交叉验证的函数,scori
    results.append(result)
    print('{}的评估结果(平均NMSE)为:{}'.format(name,result.mean()),)


【机器学习】回归算法总结 | 线性回归 | 岭回归 | | 套索回归 | 弹性网络回归 | KNN | CART| SVM_第2张图片

你可能感兴趣的:(机器学习,机器学习,回归)