手写逻辑回归算法

 自己练习手写的逻辑回归算法。

数据来源:http://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients


import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

def get_data():    #数据集读取函数
    data=pd.read_excel('...\\default of credit card clients.xls', header=0,skiprows=[0],index_col=[0])
    x_feature = list(data.columns)
    x_feature.remove('default payment next month')
    x = data[x_feature]
    y = data['default payment next month']
    sm = SMOTE(random_state=42)    # 处理过采样的方法
    X, Y = sm.fit_sample(x, y)
    x_train, x_test, y_train, y_test = train_test_split(X, Y,
                    test_size = 0.2, random_state = 0)
    return x_train, x_test, y_train, y_test

x_train, x_test, y_train, y_test = get_data()

class Logistic_Rrgression():
    import numpy as np
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))

    def fit(self,x,y,times=1000,learn_rate=0.01):
        x = np.matrix(np.hstack((np.ones((x.shape[0],1)),x)))
        weights = np.matrix(np.random.rand(x.shape[1])-0.5).T
        y = np.matrix(y).T
        for i in range(times):
            h = self.sigmoid(x.dot(weights)) - y
            weights = weights -  learn_rate*(x.T.dot(h)) 
            self.w = weights
        return self.w

    def predict_prob(self,x):
        x = np.matrix(np.hstack((np.ones((x.shape[0],1)),x)))
        prob = self.sigmoid(x.dot(self.w)).T
        return np.array(prob)[0]

    def predict_type(self,x,cut_off=0.5):
	    #x = np.matrix(np.hstack((np.ones((x.shape[0],1)),x)))
        typ = self.predict_prob(x)
        for i in range(len(typ)):
            if typ[i] > cut_off:
                typ[i] = 1	
            else:
                typ[i] = 0	
        return typ

    def accuracy(self,x,y):
        acc = sum(y == self.predict_type(x))/len(y)
        return acc

LR = Logistic_Rrgression()
LR.fit(x_train,y_train)
LR.predict_type(x_test)
LR.accuracy(x_test,y_test)


 

你可能感兴趣的:(python)