Python机器学习(使用sklearn模块)之wine划分,聚类,分类,回归

我这里用的是sklearn自带的数据集中的wine,先提供一下所有需要用到的包吧(如果用的编译器是pycharm,以下所有代码需要放到一起执行)

from sklearn.datasets import load_wine#wine数据集
from sklearn.cluster import KMeans#K-Means聚类模型
from sklearn.model_selection import train_test_split#数据集划分
from sklearn.preprocessing import StandardScaler#标准差标准化
from sklearn.decomposition import PCA#pca降维
from sklearn.linear_model import LinearRegression#线性回归模型
from sklearn.metrics import fowlkes_mallows_score,silhouette_score,accuracy_score,\
    precision_score,recall_score,f1_score,cohen_kappa_score,classification_report,roc_curve,\
    explained_variance_score,mean_absolute_error,mean_squared_error,median_absolute_error,r2_score    #聚类、分类、回归评分标准
from sklearn.svm import SVC#SVM分类模型
import matplotlib.pyplot as plt#数据可视化
import numpy as np#·numpy科学计算包

1.sklearn转换器处理wine数据集

wine = load_wine()
data = wine['data']
target = wine['target']
#数据集划分为训练集,测试集
data_train,data_test,target_train,target_test = train_test_split(data,target,test_size=0.2,random_state=125)
#标准差标准化(规则)
stdScaler = StandardScaler().fit(data_train)
data_std_train = stdScaler.transform(data_train)
data_std_test = stdScaler.transform(data_test)
#pca降维
pca_model = PCA(n_components=10).fit(data_std_train)#规则
data_pca_train = pca_model.transform(data_std_train)
data_pca_test = pca_model.transform(data_std_test)
print(data_pca_test)

2.构建聚类模型K-Means及评分

#聚类模型
kmeans = KMeans(n_clusters=3,random_state=42).fit(data)
print('聚类模型为:',kmeans)
#聚类评分
#方法1FMI评分:
s = []
for i in range(2,11):
    kmeans1 = KMeans(n_clusters=i,random_state=42).fit(data)
    score1 = fowlkes_mallows_score(target,kmeans1.labels_)
    print('FMI第%d类,评分为:%f'%(i,score1))
    s.append(score1)
print('FMI最优评分为:%f'%np.max(s))
#方法2轮廓系数评分:
sil_score = []
for j in range(2,15):
    kmeans2 = KMeans(n_clusters=j, random_state=42).fit(data)
    score2=silhouette_score(data,kmeans2.labels_)
    sil_score.append(score2)
plt.rcParams['font.sans-serif'] = 'simhei'
plt.rcParams['axes.unicode_minus'] = False
plt.figure(figsize=(10,6))
plt.title('轮廓系数评分折线图')
plt.plot(range(2,15),sil_score,linewidth=1.5,linestyle='-',c='red')
plt.xticks(range(2,15,1))
plt.show()

3.构建SVM分类模型及评分

#分类及预测
svm = SVC().fit(data_std_train,target_train)#建立svc模型
target_pred = svm.predict(data_std_test)#结果预测
true = np.sum(target_pred == target_test)
print('预测正确结果:',true)
print('预测错误结果:',target_test.shape[0]-true)
#评价报告
print('svm分类结果报告为:','\n',classification_report(target_test,target_pred))

4.构建回归模型及评分

x_train,y_train,x_test,y_test = data_train,target_train,data_test,target_test
clf = LinearRegression().fit(x_train,y_train)
y_pred = clf.predict(x_test)
print(y_pred)
#回归结果可视化
plt.figure(figsize=(10,7))
plt.plot(range(y_test.shape[0]),y_test,linewidth=1.7,linestyle='-')
plt.plot(range(y_test.shape[0]),y_pred,linewidth=1.5,linestyle='-.')
plt.legend(['真实值','预测值'])
plt.show()
#评价回归模型
print('Boston数据线性回归模型的平均绝对误差为:',mean_absolute_error(y_test,y_pred))
print('Boston数据线性回归模型的均方差为:',mean_squared_error(y_test,y_pred))
print('Boston数据线性回归模型的中值绝对误差为:',median_absolute_error(y_test,y_pred))
print('Boston数据线性回归模型的可解释方差值为:',explained_variance_score(y_test,y_pred))
print('Boston数据线性回归模型的R^2为:',r2_score(y_test,y_pred))

你可能感兴趣的:(sklearn,Python机器学习,数据分析)