#coding=gbkimportpandasaspdimportnumpyasnpimportscipy.statsasstatsdeffenzu(Y,X,n=20):r=0bad=Y.sum()good=Y.count()-badwhilenp.abs(r)<1:d1=pd.DataFrame({"X":X,"Y":Y,"Bucket...
# coding=gbk
import pandas as pd
import numpy as np
import scipy.stats as stats
def fenzu(Y,X,n=20):
r=0
bad=Y.sum()
good=Y.count()-bad
while np.abs(r)<1:
d1=pd.DataFrame({"X":X,"Y":Y,"Bucket":pd.qcut(X,n)})
d2=d1.groupby("Bucket",as_index=True)
r,p=stats.spearmanr(d2.mean().X,d2.mean().Y)
n=n-1
d3=pd.DataFrame(d2.X.min(),columns=['min'])
d3['min']=d2.min().X
d3['max'] = d2.max().X
d3['sum']=d2.sum().Y
d3['total']=d2.count().Y
d3['rate']=d2.mean().Y
d3['woe']=np.log((d3['rate']/(1-d3['rate']))/(good/bad))
d4=(d3.sort_index(by="min")).reset_index(drop=True)
print("="* 60)
print(d4)
return
data=pd.read_csv('train.csv')
fenzu(data[0],data[2])
错误如下:
Traceback (most recent call last):
File "C:/work/chaifen.py", line 30, in
fenzu(data1,data2,n=20)
File "C:/work/chaifen.py", line 11, in fenzu
d1=pd.DataFrame({"X":X,"Y":Y,"Bucket":pd.qcut(X,n)})
File "C:\python3.5\lib\site-packages\pandas\core\reshape\tile.py", line 208, in qcut
dtype=dtype, duplicates=duplicates)
File "C:\python3.5\lib\site-packages\pandas\core\reshape\tile.py", line 260, in _bins_to_cuts
result = algos.take_nd(labels, ids - 1)
File "C:\python3.5\lib\site-packages\pandas\core\algorithms.py", line 1320, in take_nd
allow_fill=allow_fill)
File "C:\python3.5\lib\site-packages\pandas\core\categorical.py", line 1705, in take_nd
codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1)
File "C:\python3.5\lib\site-packages\pandas\core\algorithms.py", line 1383, in take_nd
func(arr, indexer, out, fill_value)
File "pandas\_libs\algos_take_helper.pxi", line 562, in pandas._libs.algos.take_1d_int8_int8
ValueError: Buffer has wrong number of dimensions (expected 1, got 2)
Process finished with exit code
求大神指导。
展开