独热编码把可列举型数据,使用0,1进行编码。
比如, 1 ,2,3
用 [1,0,0], [0, 1, 0], [0, 0, 1]
来表示。
class sklearn.preprocessing.OneHotEncoder(categories='auto', drop=None,
sparse=True, dtype=<class 'numpy.float64'>, handle_unknown='error')
def oneHot(self, data):
'''
对标签进行独热编码
输入:data:array
输出:onehot data.
'''
if data.shape[0] == 0:
print('oneHot object shape[0] is zero!')
dataArray = data
else:
dataArray = np.eye(int(data.max()+1))[data]
return dataArray
或者:
data = np.array([ [0], [1], [2] ])
from sklearn import preprocessing
enc = preprocessing.OneHotEncoder(sparse=False)
data = enc.fit_transform( data )
print(data)
运行结果:
[[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]]
import numpy as np
onehotValue = np.array([[1,0,0],[0,1,0]])
value = np.argmax(onehotValue, axis = 1)
参考: