import numpy as np
import keras
def dummy_data():
list1 = [0, 1, 2]
list2 = [3, 4, 5, 6]
list3 = [7, 8]
list4 = [9]
list = []
list.append(list1)
list.append(list2)
list.append(list3)
list.append(list4)
a = np.array(list)
return a
def to_array(a, col):
for i in range(len(a)):
# 假如col=5,那么这里把形如[0,1,2]通过one-hot编码成:
# [[1,0,0,0,0]
# [0,1,0,0,0]
# [0,0,1,0,0]]
a[i] = keras.utils.to_categorical(np.array(a[i]), num_classes=col)
# numpy矩阵(数组)的各个行的对应元素相加。把形如:
# [[1,0,0,0,0]
# [0,1,0,0,0]
# [0,0,1,0,0]]
# 的numpy矩阵(数组)相加后变成:
# [1,1,1,0,0]
a[i] = np.sum(a[i], axis=0)
return a
if __name__ == "__main__":
a = dummy_data()
print('原数组:')
print(a)
print('\none-hot编码后:')
a = to_array(a, 15)
print(a)
输出:
原数组:
[list([0, 1, 2]) list([3, 4, 5, 6]) list([7, 8]) list([9])]
one-hot编码后:
[array([1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
dtype=float32)
array([0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
dtype=float32)
array([0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],
dtype=float32)
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
dtype=float32)]