对西瓜数据集实现LDA编程

数据集:

对西瓜数据集实现LDA编程_第1张图片

代码:

import pandas as pd
import numpy as np
from pandas import *
from numpy import *
import matplotlib.pyplot as plt
def readdata():
    dataset = np.loadtxt(r'C:\Users\zmy\Desktop\titanic\watermelon.csv',delimiter=",")
    df1 = dataset[0:2,:]
    df2 = dataset[3,:]
    df1 = mat(df1).transpose()
    df2 = mat(df2).transpose()
    return df1,df2
def LDA(df1, df2):
    x1 = pd.DataFrame(df1, columns=['density', 'sugar_rate'])
    x2 = pd.DataFrame(df2, columns=['label'])
    X1 = x1[x2.label == 1]
    X0 = x1[x2.label == 0]

    X1 = array(X1)
    X0 = array(X0)
    mean1 = array([mean(X1[:,0]),mean(X1[:,1])])
    mean0 = array([mean(X0[:,0]), mean(X0[:,1])])
    m1 = shape(X1)[0]
    sw = zeros((2,2))
    for i in range(m1):
        xsmean = mat(X1[i,:] - mean1)
        sw += xsmean.transpose()*xsmean
    m0 = shape(X0)[0]
    for i in range(m0):
        xsmean = mat(X0[i,:] - mean0)
        sw += xsmean.transpose()*xsmean
    w = (mean0 - mean1)*(mat(sw).I)

    return w

def plot(w,df1,df2):
    dataMat = array(df1)
    labelMat = array(df2)
    m = shape(dataMat)[0]
    xcord1 = []; ycord1 = []
    xcord2 = []; ycord2 = []
    for i in range(m):
        if(labelMat[i] == 1):
            xcord1.append(dataMat[i,0]); ycord1.append(dataMat[i,1])
        else:
            xcord2.append(dataMat[i,0]); ycord2.append(dataMat[i,1])
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(xcord1, ycord1, s = 30, c ='red', marker='s')
    ax.scatter(xcord2, ycord2, s = 30, c= 'green')
    x = arange(-0.2,0.8,0.1)
    y = array((-w[0,0]*x)/w[0,1])
    print shape(x)
    print shape(y)
    plt.sca(ax)
    plt.plot(x,y)
    plt.xlabel('density')
    plt.ylabel('ratio_sugar')
    plt.title('LDA')
    print w
    plt.show()

df1,df2 = readdata()
w = LDA(df1,df2)
plot(w,df1,df2)

结果:

对西瓜数据集实现LDA编程_第2张图片

你可能感兴趣的:(机器学习,python)