Python 实现简单的独热编码和解码(Onehot encode)

python 实现简单的独热编码和解码(Onehot encode)

import numpy as np


def one_hot_encoder(array):
    kind_list = list(set(array))
    one_hot_array = np.zeros((array.shape[0], len(kind_list)))
    for n, k in enumerate(kind_list):
        index = np.argwhere(array == k).reshape(-1)
        one_hot_array[index, n] += 1
    return kind_list, one_hot_array


def one_hot_decoder(kind_list, one_hot_array):
    array = np.zeros(one_hot_array.shape[0]).astype(str)
    for i in range(one_hot_array.shape[1]):
        index = np.argwhere(one_hot_array[:, i] == 1).reshape(-1)
        array[index] = kind_list[i]
    return array
# 用例 1
import pandas as pd
a = np.array([0, 0, 0, 0, 1, 1, 1, 2, 1, 0, 1, 2, 2, 2, 0, 1, 0, 2, 2, 2, 1, 2])
kind_list, onehot_a = one_hot_encoder(a)
a_decoded = one_hot_decoder(kind_list,onehot_a)
print(pd.DataFrame(onehot_a, columns=kind))
print(a_decoded.astype(np.int))
################################################
# 输出的独热编码
	0    1    2
0   1.0  0.0  0.0
1   1.0  0.0  0.0
2   1.0  0.0  0.0
3   1.0  0.0  0.0
4   0.0  1.0  0.0
5   0.0  1.0  0.0
6   0.0  1.0  0.0
7   0.0  0.0  1.0
8   0.0  1.0  0.0
9   1.0  0.0  0.0
10  0.0  1.0  0.0
11  0.0  0.0  1.0
12  0.0  0.0  1.0
13  0.0  0.0  1.0
14  1.0  0.0  0.0
15  0.0  1.0  0.0
16  1.0  0.0  0.0
17  0.0  0.0  1.0
18  0.0  0.0  1.0
19  0.0  0.0  1.0
20  0.0  1.0  0.0
21  0.0  0.0  1.0
###################
# decoded a
[0. 0. 0. 0. 1. 1. 1. 2. 1. 0. 1. 2. 2. 2. 0. 1. 0. 2. 2. 2. 1. 2.]
# 用例 2
import pandas as pd
a = np.array(['cold' 'cold' 'hot' 'hot' 'warm' 'hot' 'cold' 'hot' 'warm'])
kind_list, onehot_a = one_hot_encoder(a)
a_decoded = one_hot_decoder(kind_list,onehot_a)
print(pd.DataFrame(onehot_a, columns=kind))
print(a_decoded)
################################################
# 输出的独热编码
   cold  hot  warm
0   1.0  0.0   0.0
1   1.0  0.0   0.0
2   0.0  1.0   0.0
3   0.0  1.0   0.0
4   0.0  0.0   1.0
5   0.0  1.0   0.0
6   1.0  0.0   0.0
7   0.0  1.0   0.0
8   0.0  0.0   1.0
###################
# decoded a
['cold' 'cold' 'hot' 'hot' 'warm' 'hot' 'cold' 'hot' 'warm']

你可能感兴趣的:(python,numpy,pandas,机器学习,深度学习)