python 最优分箱_python最优分箱中woe计算(求大圣)

#coding=gbkimportpandasaspdimportnumpyasnpimportscipy.statsasstatsdeffenzu(Y,X,n=20):r=0bad=Y.sum()good=Y.count()-badwhilenp.abs(r)<1:d1=pd.DataFrame({"X":X,"Y":Y,"Bucket...

# coding=gbk

import pandas as pd

import numpy as np

import scipy.stats as stats

def fenzu(Y,X,n=20):

r=0

bad=Y.sum()

good=Y.count()-bad

while np.abs(r)<1:

d1=pd.DataFrame({"X":X,"Y":Y,"Bucket":pd.qcut(X,n)})

d2=d1.groupby("Bucket",as_index=True)

r,p=stats.spearmanr(d2.mean().X,d2.mean().Y)

n=n-1

d3=pd.DataFrame(d2.X.min(),columns=['min'])

d3['min']=d2.min().X

d3['max'] = d2.max().X

d3['sum']=d2.sum().Y

d3['total']=d2.count().Y

d3['rate']=d2.mean().Y

d3['woe']=np.log((d3['rate']/(1-d3['rate']))/(good/bad))

d4=(d3.sort_index(by="min")).reset_index(drop=True)

print("="* 60)

print(d4)

return

data=pd.read_csv('train.csv')

fenzu(data[0],data[2])

错误如下:

Traceback (most recent call last):

File "C:/work/chaifen.py", line 30, in

fenzu(data1,data2,n=20)

File "C:/work/chaifen.py", line 11, in fenzu

d1=pd.DataFrame({"X":X,"Y":Y,"Bucket":pd.qcut(X,n)})

File "C:\python3.5\lib\site-packages\pandas\core\reshape\tile.py", line 208, in qcut

dtype=dtype, duplicates=duplicates)

File "C:\python3.5\lib\site-packages\pandas\core\reshape\tile.py", line 260, in _bins_to_cuts

result = algos.take_nd(labels, ids - 1)

File "C:\python3.5\lib\site-packages\pandas\core\algorithms.py", line 1320, in take_nd

allow_fill=allow_fill)

File "C:\python3.5\lib\site-packages\pandas\core\categorical.py", line 1705, in take_nd

codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1)

File "C:\python3.5\lib\site-packages\pandas\core\algorithms.py", line 1383, in take_nd

func(arr, indexer, out, fill_value)

File "pandas\_libs\algos_take_helper.pxi", line 562, in pandas._libs.algos.take_1d_int8_int8

ValueError: Buffer has wrong number of dimensions (expected 1, got 2)

Process finished with exit code

求大神指导。

展开

你可能感兴趣的:(python,最优分箱)