Python的treelib构建多叉树——快速命名节点id

思想就是:为保证多叉树节点的唯一性,主要就是根据嵌套list,构建对应的节点id
首先,将嵌套list的第一个list元素,作为第一个list的元素节点的id ;
其次,为了保证节点的命名不重复,建立一个字典来统计各个节点的出现次数 ;
若有同一棵树上不在路径的子树相同,怎么知道将该字数放到哪个节点下面呢?
很简单,就是建立临时path,从当前list的第一个元素与之前的list相同位置元素进行比较,若第一个元素相同,则将对应的list抽出放到path,当前list节点命名与path中的同位置相同元素一样,若不同,则将id命名为该元素与其当前计数的拼接str。

def encoding_tree_node_id(temps):
    # first template的值直接作为其id
    temps_id = [temps[0]]
    # 构建一个id计数器以对其节点id进行命名
    id_count_dict = dict(zip(temps_id[0],[1]*len(temps_id[0])))
    # Template总数
    len_temps = len(temps)
    # encoding id begin
    for i in range(1,len_temps):
        temp_path = [] # 将符合条件的temp存入临时path
        temp_path_loc = [] # 记录临时path在原始temps中的位置
        len_current_list = len(temps[i])
        for xi in temps[i]:
            if xi in id_count_dict.keys():
                id_count_dict[xi] = id_count_dict[xi] + 1
            else:
                id_count_dict[xi] = 1
        for k in range(len_current_list):
            if k == 0:
                for j in range(i):
                    if temps[i][k] == temps[j][k]:
                        temp_path.append(temps[j]) # temp_path = [['sa1','sa3','s5'], ['sa1','sa3','s6'], ['sa1','sa5'],['sa1','sa3','sa7','sa8']]
                        temp_path_loc.append(j) # temp_path_loc = [3,4,5,6]
                        if len(temps_id) < i + 1:
                            temps_id.append([temps_id[j][k]]) # temps_id = [['sa7','sa2','s3','s5'],['sa7','sa2','s3','s6'],['sa7']]
                        else:
                            # if temps_id[j][k] == temps_id[j+1][k]:
                            #     pass
                            pass
                    else:
                        pass
                if len(temp_path) == 0:
                    temp_path.append(temps[i])
                    temp_path_loc.append(j)
                    temps_id.append([])
                    # flatten_temps_id00 = list(set(flatten(temps_id)))
                    for xk in range(len_current_list):
                        if temps[i][xk] in id_count_dict.keys():
                            temps_id[i].append( temps[i][xk] + str(id_count_dict[temps[i][xk]]) )
                            # temps_id[i].append(temps[i][xk] + '01')
                        else:
                            temps_id[i].append(temps[i][xk])
                    # temps_id.append(temps[i])
                    break
                # else:
                #     pass
            # elif len(temp_path) == 0:
            #     temp_path.append(temps[i]) # temp_path = [['sa1','sa3','s5']]
            #     temp_path_loc.append(i)
            #     temps_id.append(temps[i])
            #     break
            # elif len(temp_path) == 1:
            #     break
            else:
                temp_path01 = []
                temp_path_loc01 = []
                for x in range(len(temp_path)):
                    if (k+1) <= len(temp_path[x]):
                        if temps[i][k] == temp_path[x][k]:
                            temp_path01.append(temp_path[x])
                            temp_path_loc01.append(temp_path_loc[x])
                            # temp_path = temp_path
                            if len(temps_id[i]) < k+1:
                                temps_id[i].append(temps_id[temp_path_loc[x]][k]) # temps_id = [['sa7','sa2','s3','s5'],['sa7','sa2','s3','s6'],['sa7','sa2']]
                            else:
                                # if temps_id[j][k] == temps_id[i][k]:
                                #     pass
                                pass
                        else:
                            pass
                    else:
                        continue
                temp_path = temp_path01
                temp_path_loc = temp_path_loc01
            if temp_path == []:
                break
            else:
                continue
        if len(temps_id[i]) < len(temps[i]):
            for y in range( len(temps_id[i]),len(temps[i]) ):
                # flatten_temps_id = list(set(flatten(temps_id)))
                if temps[i][y] in id_count_dict.keys():
                    temps_id[i].append( temps[i][y] + str(id_count_dict[temps[i][y]]) )
                else:
                    temps_id[i].append(temps[i][y])
    return temps_id

例如:

>>> temps = [
['sa7','sa2','sa3','sa5'],
['sa1','sa3','sa5'],
['sa1','sa3','sa6'],
['sa1','sa5'],
['sa7','sa2','sa3','sa6'],
['sa7','sa2','sa4'],
['sa1','sa3','sa7','sa8'],
['sa1','sa5','sa8']
]
>>> encoding_tree_node_id(temps)
[['sa7', 'sa2', 'sa3', 'sa5'], ['sa11', 'sa32', 'sa52'], ['sa11', 'sa32', 'sa61'], ['sa11', 'sa53'], ['sa7', 'sa2', 'sa3', 'sa62'], ['sa7', 'sa2', 'sa41'], ['sa11', 'sa32', 'sa74', 'sa81'], ['sa11', 'sa53', 'sa82']]

你可能感兴趣的:(Python)