python之实战----线性判别分析(LDA)战iris

LDA预测

# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt  
import numpy as np  
from sklearn.model_selection import train_test_split   
from sklearn import datasets, discriminant_analysis
def laod_data():
    iris=datasets.load_iris()
    X_train=iris.data
    y_train=iris.target
    return train_test_split(X_train,y_train,
    test_size=0.25,random_state=0,stratify=y_train)#stratify分层
def test_LinearDiscriminantAnalysis(*data):
    X_train,X_test,y_train,y_test=data
    lda=discriminant_analysis.LinearDiscriminantAnalysis()
    lda.fit(X_train,y_train)
    print('Coefficients:%s, intercept %s'%(lda.coef_,lda.intercept_))
    print("Score:%.2f"%lda.score(X_test,y_test))
if __name__=='__main__':
    X_train,X_test,y_train,y_test=laod_data() 
    test_LinearDiscriminantAnalysis(X_train,X_test,y_train,y_test)
    

结果是

E:\p>python test.py
Coefficients:[[  6.575853     9.75807593 -14.34026669 -21.39076537]
 [ -1.98385061  -3.49791089   4.21495042   2.60304299]
 [ -4.47116022  -6.09542385   9.85886057  18.29330864]], intercept [-15.33097142   0.46730077 -30.53297367]
Score:1.00


现在检查原始数据经过LDA之后的情况。

出现警告

D:\python2.7\lib\site-packages\sklearn\utils\validation.py:547: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)

提醒是要对预测输出y做出ravel()转换,也就是说我的y是2D的形式(shapes, 1),要把二维的形式改成1D的形式(shapes, ),,这就可以对fit输入的y_train作y_train.ravel()这样的转换


正常运行源码:

# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt  
import numpy as np  
from sklearn.model_selection import train_test_split   
from sklearn import datasets, discriminant_analysis

def load_data():
    iris=datasets.load_iris()
    X_train=iris.data
    y_train=iris.target
    return train_test_split(X_train,y_train,
    test_size=0.25,random_state=0,stratify=y_train)#stratify分层
def plot_LDA(converted_X,y):
    '''
    绘制经过 LDA 转换后的数据

    :param converted_X: 经过 LDA转换后的样本集(150,3)降维后的表示方法
    :param y: 样本集的标记(150,1)
    :return:  None
    '''
    from mpl_toolkits.mplot3d import Axes3D
    fig=plt.figure()
    ax=Axes3D(fig)
    colors='rgb'
    markers='o*s'

    for target,color,marker in zip([0,1,2],colors,markers):  #zip()方法用在for循环中,支持并行迭代
        pos=(y==target).ravel()  #由标签转换为bool矩阵,分成不同标签类型的矩阵(150,) ravel平铺用于拆分数据
        X=converted_X[pos,:]        # 取出对应pos中True的元素,目的是将降维后的数据拆分为三类X(50,3)---》50行数据正好下面可以取每一列共50

        ax.scatter(X[:,0], X[:,1], X[:,2],color=color,marker=marker,  #  X[:,0]取出第一列的数据
            label="Label %d"%target)

    ax.legend(loc="best")
    fig.suptitle("Iris After LDA")
    plt.show()


def run_plot_LDA():
    '''
    执行 plot_LDA 。其中数据集来自于 load_data() 函数

    :return: None
    '''
    # X_train(112,4)  X_test(38,4) y_train(112,) y_test(38,)
    X_train,X_test,y_train,y_test=load_data()

    # X(150,4)
    X=np.vstack((X_train,X_test))  # 考虑到lda的fit接受的参数问题,这里需要组合X_Train和X_test

    # Y(150,1)  原本维度为1,所以要先reshape
    Y=np.vstack((y_train.reshape(y_train.size,1),y_test.reshape(y_test.size,1))) # 组合y_train和y_test#size行1列
    lda = discriminant_analysis.LinearDiscriminantAnalysis()
    lda.fit(X, Y.ravel())#lda.fit(X,Y)就会出警告

    # converted_X(150,3) 对数据降维了    权值lda.coef_(3, 4)  lda.intercept_(3,)
    converted_X=np.dot(X,np.transpose(lda.coef_))+lda.intercept_  # X*权值+偏置b  就是输出值
    #print(converted_X)
    plot_LDA(converted_X,Y)
if __name__=='__main__':
    run_plot_LDA()






















你可能感兴趣的:(python数据分析实战练习)