sklearn KNeighborsRegressor

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re

from sklearn.neighbors import KNeighborsRegressor

df = pd.read_csv("data/cs-training.csv")

将名字都改为 snake_case

def camel_to_snake(column_name):
    """
    converts a string that is camelCase into snake_case
    """
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', column_name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
camel_to_snake("javaLovesCamelCase")

df.columns = [camel_to_snake(col) for col in df.columns]
df.columns.tolist()


income_imputer = KNeighborsRegressor(n_neighbors=1)

train_w_monthly_income = df[df.monthly_income.isnull()==False]
train_w_null_monthly_income = df[df.monthly_income.isnull()==True]

cols = ['number_real_estate_loans_or_lines', 'number_of_open_credit_lines_and_loans']
income_imputer.fit(train_w_monthly_income[cols], train_w_monthly_income.monthly_income)
new_values = income_imputer.predict(train_w_null_monthly_income[cols])

# print(new_values)
train_w_null_monthly_income.loc[:,'monthly_income']=new_values

http://www.waitingfy.com/archives/5195

你可能感兴趣的:(sklearn)