思想就是:为保证多叉树节点的唯一性,主要就是根据嵌套list,构建对应的节点id
首先,将嵌套list的第一个list元素,作为第一个list的元素节点的id ;
其次,为了保证节点的命名不重复,建立一个字典来统计各个节点的出现次数 ;
若有同一棵树上不在路径的子树相同,怎么知道将该字数放到哪个节点下面呢?
很简单,就是建立临时path,从当前list的第一个元素与之前的list相同位置元素进行比较,若第一个元素相同,则将对应的list抽出放到path,当前list节点命名与path中的同位置相同元素一样,若不同,则将id命名为该元素与其当前计数的拼接str。
def encoding_tree_node_id(temps):
# first template的值直接作为其id
temps_id = [temps[0]]
# 构建一个id计数器以对其节点id进行命名
id_count_dict = dict(zip(temps_id[0],[1]*len(temps_id[0])))
# Template总数
len_temps = len(temps)
# encoding id begin
for i in range(1,len_temps):
temp_path = [] # 将符合条件的temp存入临时path
temp_path_loc = [] # 记录临时path在原始temps中的位置
len_current_list = len(temps[i])
for xi in temps[i]:
if xi in id_count_dict.keys():
id_count_dict[xi] = id_count_dict[xi] + 1
else:
id_count_dict[xi] = 1
for k in range(len_current_list):
if k == 0:
for j in range(i):
if temps[i][k] == temps[j][k]:
temp_path.append(temps[j]) # temp_path = [['sa1','sa3','s5'], ['sa1','sa3','s6'], ['sa1','sa5'],['sa1','sa3','sa7','sa8']]
temp_path_loc.append(j) # temp_path_loc = [3,4,5,6]
if len(temps_id) < i + 1:
temps_id.append([temps_id[j][k]]) # temps_id = [['sa7','sa2','s3','s5'],['sa7','sa2','s3','s6'],['sa7']]
else:
# if temps_id[j][k] == temps_id[j+1][k]:
# pass
pass
else:
pass
if len(temp_path) == 0:
temp_path.append(temps[i])
temp_path_loc.append(j)
temps_id.append([])
# flatten_temps_id00 = list(set(flatten(temps_id)))
for xk in range(len_current_list):
if temps[i][xk] in id_count_dict.keys():
temps_id[i].append( temps[i][xk] + str(id_count_dict[temps[i][xk]]) )
# temps_id[i].append(temps[i][xk] + '01')
else:
temps_id[i].append(temps[i][xk])
# temps_id.append(temps[i])
break
# else:
# pass
# elif len(temp_path) == 0:
# temp_path.append(temps[i]) # temp_path = [['sa1','sa3','s5']]
# temp_path_loc.append(i)
# temps_id.append(temps[i])
# break
# elif len(temp_path) == 1:
# break
else:
temp_path01 = []
temp_path_loc01 = []
for x in range(len(temp_path)):
if (k+1) <= len(temp_path[x]):
if temps[i][k] == temp_path[x][k]:
temp_path01.append(temp_path[x])
temp_path_loc01.append(temp_path_loc[x])
# temp_path = temp_path
if len(temps_id[i]) < k+1:
temps_id[i].append(temps_id[temp_path_loc[x]][k]) # temps_id = [['sa7','sa2','s3','s5'],['sa7','sa2','s3','s6'],['sa7','sa2']]
else:
# if temps_id[j][k] == temps_id[i][k]:
# pass
pass
else:
pass
else:
continue
temp_path = temp_path01
temp_path_loc = temp_path_loc01
if temp_path == []:
break
else:
continue
if len(temps_id[i]) < len(temps[i]):
for y in range( len(temps_id[i]),len(temps[i]) ):
# flatten_temps_id = list(set(flatten(temps_id)))
if temps[i][y] in id_count_dict.keys():
temps_id[i].append( temps[i][y] + str(id_count_dict[temps[i][y]]) )
else:
temps_id[i].append(temps[i][y])
return temps_id
例如:
>>> temps = [
['sa7','sa2','sa3','sa5'],
['sa1','sa3','sa5'],
['sa1','sa3','sa6'],
['sa1','sa5'],
['sa7','sa2','sa3','sa6'],
['sa7','sa2','sa4'],
['sa1','sa3','sa7','sa8'],
['sa1','sa5','sa8']
]
>>> encoding_tree_node_id(temps)
[['sa7', 'sa2', 'sa3', 'sa5'], ['sa11', 'sa32', 'sa52'], ['sa11', 'sa32', 'sa61'], ['sa11', 'sa53'], ['sa7', 'sa2', 'sa3', 'sa62'], ['sa7', 'sa2', 'sa41'], ['sa11', 'sa32', 'sa74', 'sa81'], ['sa11', 'sa53', 'sa82']]