scikit-learn labelencoder 同时处理多列

根据网络资料整理

1.

x=df.apply(LabelEncoder().fit_transform)

2.

from collections import defaultdict
d = defaultdict(LabelEncoder)

# Encoding the variable

fit = df.apply(lambda x: d[x.name].fit_transform(x))

# Inverse the encodedfit.apply(lambda x: d[x.name].inverse_transform(x))

# Using the dictionary to label future data

df.apply(lambda x: d[x.name].transform(x))

3.

import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

class MultiColumnLabelEncoder:
    def __init__(self,columns = None):
        self.columns = columns # array of column names to encode

    def fit(self,X,y=None):
        return self # not relevant here

    def transform(self,X):
        '''
        Transforms columns of X specified in self.columns using
        LabelEncoder(). If no columns specified, transforms all
        columns in X.
        '''
        output = X.copy()
        if self.columns is not None:
            for col in self.columns:
                output[col] = LabelEncoder().fit_transform(output[col])
        else:
            for colname,col in output.iteritems():
                output[colname] = LabelEncoder().fit_transform(col)
        return output

    def fit_transform(self,X,y=None):
        return self.fit(X,y).transform(X)

 

使用:

# Create some toy data in a Pandas dataframe
fruit_data = pd.DataFrame({
    'fruit':  ['apple','orange','pear','orange'],
    'color':  ['red','orange','green','green'],
    'weight': [5,6,3,4]
})

 

MultiColumnLabelEncoder().fit_transform(fruit_data.drop('weight',axis=1))

 

 

你可能感兴趣的:(机器学习,Python)