One Hot Encoder

#1. get_dummies() on pandas dataframe.

例子

import pandas as pd
s = pd.Series(list('abcda'))
pd.get_dummies(s).T

0	1	2	3	4
a	1	0	0	0	1
b	0	1	0	0	0
c	0	0	1	0	0
d	0	0	0	1	0

编码指定列

import pandas as pd
df = pd.DataFrame({
          'A':['a','b','a'],
          'B':['b','a','c']
        })
df
Out[]: 
   A  B
0  a  b
1  b  a
2  a  c

# Get one hot encoding of columns B
one_hot = pd.get_dummies(df['B'])
# Drop column B as it is now encoded
df = df.drop('B',axis = 1)
# Join the encoded df
df = df.join(one_hot)
df  
Out[]: 
       A  a  b  c
    0  a  0  1  0
    1  b  1  0  0
    2  a  0  0  1

pd.factorize( ['B', 'C', 'D', 'B'] )[0]
[0, 1, 2, 0]

#2. numpy eye()

import numpy as np

def one_hot_encode(x, n_classes):
    """
    One hot encode a list of sample labels. Return a one-hot encoded vector for each label.
    : x: List of sample Labels
    : return: Numpy array of one-hot encoded labels
     """
    return np.eye(n_classes)[x]

def main():
    list = [0,1,2,3,4,3,2,1,0]
    n_classes = 5
    one_hot_list = one_hot_encode(list, n_classes)
    print(one_hot_list)

if __name__ == "__main__":
    main()

[[ 1.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.]
 [ 0.  0.  0.  0.  1.]
 [ 0.  0.  0.  1.  0.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  1.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.]]

#3. tf.one_hot

import numpy as np
import tensorflow as tf

indices = [0, 1, 2]
depth = 3
T1 = tf.one_hot(indices, depth)  # output: [3 x 3]

indices = [0, 2, -1, 1]
depth = 3
T2 = tf.one_hot(indices, depth,
           on_value=5.0, off_value=0.0,
           axis=-1)  # output: [4 x 3]
#depth： 定义one hot维度
#on_value： feature对应填充值，默认1
#off_value：其他位置填充值，默认0
#axis: 填充方向

indices = [[0, 2], [1, -1]]
depth = 3
T3 = tf.one_hot(indices, depth,
           on_value=1.0, off_value=0.0,
           axis=-1)  # output: [2 x 2 x 3]
with tf.Session() as sess:
    print(sess.run(T1))
    print(sess.run(T2))
    print(sess.run(T3))
    

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[[5. 0. 0. 0.]
 [0. 0. 0. 5.]
 [0. 5. 0. 0.]]
[[[1. 0. 0.]
  [0. 0. 1.]]

 [[0. 1. 0.]
  [0. 0. 0.]]]

#4. sklearn.preprocessing.OneHotEncoder

方法

`fit`(self, X[, y])	Fit OneHotEncoder to X.
`fit_transform`(self, X[, y])	Fit OneHotEncoder to X, then transform X.
`get_feature_names`(self[, input_features])	Return feature names for output features.
`get_params`(self[, deep])	Get parameters for this estimator.
`inverse_transform`(self, X)	Convert the back data to the original representation.
`set_params`(self, **params)	Set the parameters of this estimator.
`transform`(self, X)	Transform X using one-hot encoding.

例子

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
X = [['Male', 1], ['Female', 3], ['Female', 2]]
enc.fit(X)
enc.categories_
enc.transform([['Female', 1], ['Male', 4]]).toarray() 
array([[1., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0.]])
       #第一位对性别进行编码，后四位对1或4进行编码
enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])
array([['Male', 1],
       [None, 2]], dtype=object)
enc.get_feature_names()
drop_enc = OneHotEncoder(drop='first').fit(X)
drop_enc.categories_
drop_enc.transform([['Female', 1], ['Male', 2]]).toarray()
array([[0., 0., 0.],
       [1., 1., 0.]])

参考：

tf.one_hot
sklearn.preprocessing.OneHotEncoder
How can I one hot encode in Python?

One Hot Encoder

#1. get_dummies() on pandas dataframe.

#2. numpy eye()

#3. tf.one_hot

#4. sklearn.preprocessing.OneHotEncoder

参考：

你可能感兴趣的:(One Hot Encoder)