机器学习Sklearn学习总结3——回归与聚类算法

目录

一、线性回归

线性回归与梯度下降对比的代码:

二、线性回归改进——岭回归

三、线性回归各指标的案例

四、模型的保存与加载

五、KMeans聚类算法

代码案例:

 KMeans总结

六、总结

 代码集


一、线性回归

机器学习Sklearn学习总结3——回归与聚类算法_第1张图片

线性回归与梯度下降对比的代码:

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import mean_squared_error


def linear1():
    """
    正规方程的优化方法对波士顿房价进行预测
    :return:
    """
    # 1)获取数据
    boston = load_boston()
    print("特征数量:\n", boston.data.shape)

    # 2) 划分数据集
    x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)

    # 3) 标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)

    # 4)预估器
    estimator = LinearRegression()
    estimator.fit(x_train, y_train)

    # 5)得出模型
    print("正规方程——权重系数为:\n", estimator.coef_)
    print("正规方程——偏置为:\n", estimator.intercept_)

    # 6)模型评估
    y_predict = estimator.predict(x_test)
    print("预测房价:\n", y_predict)
    error = mean_squared_error(y_test, y_predict)
    print("正规方程——均方误差为:\n", error)

    return None


def linear2():
    """
    梯度下降的优化方法对波士顿房价进行预测
    :return:
    """
    # 1)获取数据
    boston = load_boston()

    # 2) 划分数据集
    x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)

    # 3) 标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)

    # 4)预估器
    estimator = SGDRegressor(learning_rate="constant", eta0=0.01, max_iter=10000)
    estimator.fit(x_train, y_train)

    # 5)得出模型
    print("梯度下降——权重系数为:\n", estimator.coef_)
    print("梯度下降——偏置为:\n", estimator.intercept_)

    # 6)模型评估
    y_predict = estimator.predict(x_test)
    print("预测房价:\n", y_predict)
    error = mean_squared_error(y_test, y_predict)
    print("梯度下降——均方误差为:\n", error)

    return None


if __name__ == '__main__':
    # 代码1:正规方程的优化方法对波士顿房价进行预测
    linear1()
    # 代码2:梯度下降的优化方法对波士顿房价进行预测
    linear2()

二、线性回归改进——岭回归

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge
from sklearn.metrics import mean_squared_error


def linear3():
    """
    岭回归对波士顿房价进行预测
    :return:
    """
    # 1)获取数据
    boston = load_boston()

    # 2) 划分数据集
    x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)

    # 3) 标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)

    # 4)预估器
    estimator = Ridge(alpha=0.5, max_iter=10000)
    estimator.fit(x_train, y_train)

    # 5)得出模型
    print("岭回归——权重系数为:\n", estimator.coef_)
    print("岭回归——偏置为:\n", estimator.intercept_)

    # 6)模型评估
    y_predict = estimator.predict(x_test)
    print("预测房价:\n", y_predict)
    error = mean_squared_error(y_test, y_predict)
    print("岭回归——均方误差为:\n", error)

    return None


if __name__ == '__main__':
    # 代码3:岭回归对波士顿房价进行预测
    linear3()

三、线性回归各指标的案例

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score


def handle_data():
    # 1、读取数据
    data = pd.read_csv("cancel.csv")
    # 2、缺失值处理
    # 1)替换-》np.nan
    data = data.replace(to_replace="?", value=np.nan)
    # 2)删除缺失样本
    data.dropna(inplace=True)

    # 3、划分数据集
    # 筛选特征值和目标值
    x = data.iloc[:, 1:-1]
    y = data["Class"]
    x_train, x_test, y_train, y_test = train_test_split(x, y)

    # 标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)

    # 5、预估器流程
    estimator = LogisticRegression()
    estimator.fit(x_train, y_train)

    # 逻辑回归的模型参数:回归系数和偏置
    print("逻辑回归的模型参数:回归系数和偏置:\n", estimator.coef_, "\n", estimator.intercept_)

    # 6、模型评估
    # 方法1:直接比对真实值和预测值
    y_predict = estimator.predict(x_test)
    print("y_predict:\n", y_predict)
    print("直接比对真实值和预测值:\n", y_test == y_predict)

    # 方法2: 计算准确率
    score = estimator.score(x_test, y_test)
    print("准确率:\n", score)

    # 查看精确率、召回率、F1-score
    report = classification_report(y_test, y_predict, labels=[2, 4], target_names=["良性", "恶性"])

    # y_true: 每个样本的真实类别,必须为0(反例),1(正例)标记
    # 将y_test 转换成 0 1
    y_true = np.where(y_test > 3, 1, 0)
    msg = roc_auc_score(y_true, y_predict)
    

if __name__ == '__main__':
    handle_data()

四、模型的保存与加载

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge
from sklearn.metrics import mean_squared_error
import joblib


def linear3():
    """
    岭回归对波士顿房价进行预测
    :return:
    """
    # 1)获取数据
    boston = load_boston()

    # 2) 划分数据集
    x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)

    # 3) 标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)

    # 4)预估器
    estimator = Ridge(alpha=0.5, max_iter=10000)
    estimator.fit(x_train, y_train)

    # 保存模型
    joblib.dump(estimator, "my.ridge.pkl")
    # 加载模型
    estimator = joblib.load("my.ridge.pkl")

    # 5)得出模型
    print("岭回归——权重系数为:\n", estimator.coef_)
    print("岭回归——偏置为:\n", estimator.intercept_)

    # 6)模型评估
    y_predict = estimator.predict(x_test)
    print("预测房价:\n", y_predict)
    error = mean_squared_error(y_test, y_predict)
    print("岭回归——均方误差为:\n", error)

    return None


if __name__ == '__main__':
    # 代码3:岭回归对波士顿房价进行预测
    linear3()

五、KMeans聚类算法

机器学习Sklearn学习总结3——回归与聚类算法_第2张图片

代码案例:

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge
from sklearn.metrics import mean_squared_error
import joblib
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score


def linear4():
    """
    KMeans算法对波士顿房价进行预测
    :return:
    """
    # 1)获取数据
    boston = load_boston()

    # 2) 划分数据集
    x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)

    # 3) 标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)

    # 4)预估器
    estimator = KMeans(n_clusters=3)
    estimator.fit(boston)

    y_predict = estimator.predict(boston)

    # 模型评估-轮廓系数
    silhouette_score(boston, y_predict)

    return None


if __name__ == '__main__':
    linear4()

 KMeans总结

特点分析:采用迭代算法,直观易懂并且非常实用。

缺点:容易收敛到局部最优解(多次聚类)。

应用场景: 没有进行聚类之前(没有目标值)。

六、总结

机器学习Sklearn学习总结3——回归与聚类算法_第3张图片

 机器学习Sklearn学习总结3——回归与聚类算法_第4张图片

 代码集

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge
from sklearn.metrics import mean_squared_error
import joblib
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score


def linear1():
    """
    正规方程的优化方法对波士顿房价进行预测
    :return:
    """
    # 1)获取数据
    boston = load_boston()
    print("特征数量:\n", boston.data.shape)

    # 2) 划分数据集
    x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)

    # 3) 标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)

    # 4)预估器
    estimator = LinearRegression()
    estimator.fit(x_train, y_train)

    # 5)得出模型
    print("正规方程——权重系数为:\n", estimator.coef_)
    print("正规方程——偏置为:\n", estimator.intercept_)

    # 6)模型评估
    y_predict = estimator.predict(x_test)
    print("预测房价:\n", y_predict)
    error = mean_squared_error(y_test, y_predict)
    print("正规方程——均方误差为:\n", error)

    return None


def linear2():
    """
    梯度下降的优化方法对波士顿房价进行预测
    :return:
    """
    # 1)获取数据
    boston = load_boston()

    # 2) 划分数据集
    x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)

    # 3) 标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)

    # 4)预估器
    estimator = SGDRegressor(learning_rate="constant", eta0=0.01, max_iter=10000)
    estimator.fit(x_train, y_train)

    # 5)得出模型
    print("梯度下降——权重系数为:\n", estimator.coef_)
    print("梯度下降——偏置为:\n", estimator.intercept_)

    # 6)模型评估
    y_predict = estimator.predict(x_test)
    print("预测房价:\n", y_predict)
    error = mean_squared_error(y_test, y_predict)
    print("梯度下降——均方误差为:\n", error)

    return None


def linear3():
    """
    岭回归对波士顿房价进行预测
    :return:
    """
    # 1)获取数据
    boston = load_boston()

    # 2) 划分数据集
    x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)

    # 3) 标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)

    # 4)预估器
    estimator = Ridge(alpha=0.5, max_iter=10000)
    estimator.fit(x_train, y_train)

    # 保存模型
    joblib.dump(estimator, "my.ridge.pkl")
    # 加载模型
    estimator = joblib.load("my.ridge.pkl")

    # 5)得出模型
    print("岭回归——权重系数为:\n", estimator.coef_)
    print("岭回归——偏置为:\n", estimator.intercept_)

    # 6)模型评估
    y_predict = estimator.predict(x_test)
    print("预测房价:\n", y_predict)
    error = mean_squared_error(y_test, y_predict)
    print("岭回归——均方误差为:\n", error)

    return None


def linear4():
    """
    KMeans算法对波士顿房价进行预测
    :return:
    """
    # 1)获取数据
    boston = load_boston()

    # 2) 划分数据集
    x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)

    # 3) 标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.transform(x_test)

    # 4)预估器
    estimator = KMeans(n_clusters=3)
    estimator.fit(boston)

    y_predict = estimator.predict(boston)

    # 模型评估-轮廓系数
    silhouette_score(boston, y_predict)

    return None


if __name__ == '__main__':
    # 代码1:正规方程的优化方法对波士顿房价进行预测
    # linear1()
    # 代码2:梯度下降的优化方法对波士顿房价进行预测
    # linear2()
    # 代码3:岭回归对波士顿房价进行预测
    linear3()

你可能感兴趣的:(机器学习,数据挖掘,机器学习,sklearn,算法,数据挖掘,python)