pandas数据分箱技术

第一种(把元素放进篮子里)

from random import randint
from pprint import pprint

# score_list = [randint(50,100) for _ in range(20)]
score_list = [63, 67, 73, 84, 88, 97, 70, 85, 68, 96, 95, 60, 83, 70, 77, 86, 83, 94, 100, 82]
print(score_list)

low = []
mid = []
ok = []

def deal_with(v):
    global low
    global mid
    global ok
    if  50< v <70:
        low.append(v)
        return
    elif 70<=v<90:
        mid.append(v)
        return
    elif 90<=v<=100:
        ok.append(v)
        return

map(deal_with, score_list)
pprint({
     "low":low,"mid":mid,"ok":ok})
###########################################
#[63, 67, 73, 84, 88, 97, 70, 85, 68, 96, 95, 60, 83, 70, 77, 86, 83, 94, 100, 82]
#[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
#{'low': [63, 67, 68, 60],'mid': [73, 84, 88, 70, 85, 83, 70, 77, 86, 83, 82],'ok': [97, 96, 95, 94, 100]}


第二种(把元素放进篮子里)

from random import randint
from pprint import pprint
from itertools import groupby

# score_list = [randint(50,100) for _ in range(20)]
score_list = [63, 67, 73, 84, 88, 97, 70, 85, 68, 96, 95, 60, 83, 70, 77, 86, 83, 94, 100, 82]
print(score_list)

data1 = sorted(score_list)  #注意先排序
print(data1)

def deal_with_simple_data(d):
    if 50 < d < 70:
        return "low"
    elif 70 <= d < 90:
        return "mid"
    elif 90 <= d <= 100:
        return "ok"

itor_data = groupby(data1, key=deal_with_simple_data)
pprint([(name, list(eles)) for name,eles in itor_data])
########################################################
#[63, 67, 73, 84, 88, 97, 70, 85, 68, 96, 95, 60, 83, 70, 77, 86, 83, 94, 100, 82]
#[60, 63, 67, 68, 70, 70, 73, 77, 82, 83, 83, 84, 85, 86, 88, 94, 95, 96, 97, 100]
#[('low', [60, 63, 67, 68]),
 #('mid', [70, 70, 73, 77, 82, 83, 83, 84, 85, 86, 88]),
 #('ok', [94, 95, 96, 97, 100])]


第三种(每个元素属于哪种篮子)

1.

import pandas as pd

score_list = [63, 67, 73, 84, 88, 97, 70, 85, 68, 96, 95, 60, 83, 70, 77, 86, 83, 94, 100, 82]
print(score_list)

bins = [50,70,90,100]

res = pd.cut(score_list, bins)
res1 = pd.cut(score_list, bins, labels=["及格","中等","优秀"])
print(res)
print("---"*35)
print(res1)
#################################################
"""
[63, 67, 73, 84, 88, 97, 70, 85, 68, 96, 95, 60, 83, 70, 77, 86, 83, 94, 100, 82]
[(50, 70], (50, 70], (70, 90], (70, 90], (70, 90], ..., (70, 90], (70, 90], (90, 100], (90, 100], (70, 90]]
Length: 20
Categories (3, interval[int64]): [(50, 70] < (70, 90] < (90, 100]]
---------------------------------------------------------------------------------------------------------
[及格, 及格, 中等, 中等, 中等, ..., 中等, 中等, 优秀, 优秀, 中等]
Length: 20
Categories (3, object): [及格 < 中等 < 优秀]
"""

2.


你可能感兴趣的:(机器学习)