ElementTree合并子节点(2)

def refactor_xml_param(xml_str):
    """
    Refactor xml params:
        xml_tmp1 = "" \
                   "    1" \
                   "    1" \
                   "    1" \
                   "    111" \
                   "    112" \
                   "    221" \
                   "    221" \
                   ""
        xml_tmp2 = "" \
                   "    111" \
                   "    111" \
                   "    222" \
                   "    111" \
                   "    112" \
                   "    221" \
                   "    111" \
                   "    222" \
                   "    222

222

" \ "
" convert xml_tmp1 to "11111222111" convert xml_tmp2 to "111111221112111

222

111222
222
" :param """ root = ElementTree.fromstring(xml_str) # remove duplicate xml config node children = root.getchildren() children = [ElementTree.tostring(child).strip() for child in children] children = list(set(children)) xml_str = '' + ''.join(children) + '' root = ElementTree.fromstring(xml_str) # xml tag counter, if not duplicate, return xml str directly parent_map = dict((c, p) for p in root.getiterator() for c in p) nodes = [(node.tag, node, parent_map[node]) for node in root.iter() if node is not root] tags = [n[0] for n in nodes] counter = dict(Counter(tags)) duplicates = [key for key, value in counter.items() if value > 1] if not duplicates: xml_str = ElementTree.tostring(root) print(xml_str) return xml_str # if xml tag duplicate, make new xml str new_xml = make_new_xml(root, Element('site'), Element('site')) xml_str = ElementTree.tostring(new_xml) print(xml_str) return xml_str def make_new_xml(root, element, new_root): """ Make a new xml element: :param root: Element(has old children, the children maybe changed after processing), merge duplicate tag :param element: Element(no children), storage new children what after root processing temporarily :param new_root: Element(has old children), storage new children, and changed every time in stack :return new_root: An element with full sub element which after processing """ # remove duplicate xml node(Not used for the first time) children = root.getchildren() children = [ElementTree.tostring(child).strip() for child in children] children = list(set(children)) xml_str = '<' + element.tag + '>' + ''.join(children) + '' root = ElementTree.fromstring(xml_str) # xml tag counter level_nodes = [first for first in root.getchildren()] level_tags = [level_node.tag for level_node in level_nodes] level_counter = dict(Counter(level_tags)) # if tag duplicate, create new sub element, and move old sub elements' children to new sub element level_duplicates = [key for key, value in level_counter.items() if value > 1] for dup in level_duplicates: cur_dup_cases = root.findall(dup) child = SubElement(element, dup) for case in cur_dup_cases: case_children = case.getchildren() for case_child in case_children: child.append(case_child) # if tag not duplicate, move old sub element to new sub element directly level_not_duplicates = [key for key, value in level_counter.items() if value == 1] for not_dup in level_not_duplicates: cur_not_dup_case = root.find(not_dup) element.append(cur_not_dup_case) new_root_children = element.getchildren() # remove old sub element from parent node(old sub elements maybe duplicate) n_children = new_root.getchildren() for n_child in list(n_children): new_root.remove(n_child) if new_root_children > 0: # add new sub element to parent node for child in new_root_children: new_root.append(child) # traverse each new sub element, because of new sub element's sub element maybe also duplicate for child in new_root_children: # when node meets ['c'], do not need merge their duplicate nodes # when new sub element has no child, do not need next step if child.tag not in ['c'] and len(child.getchildren()) > 0: make_new_xml(child, Element(child.tag), new_root.find(child.tag)) return new_root if __name__ == '__main__': xml_tmp1 = "" \ " 1" \ " 1" \ " 1" \ " 111" \ " 112" \ " 221" \ " 221" \ "" xml_tmp2 = "" \ " 111" \ " 111" \ " 222" \ " 111" \ " 112" \ " 221" \ " 111" \ " 222" \ " 222

222

" \ "
" res = refactor_xml_param(xml_tmp2)

 

 

你可能感兴趣的:(ElementTree合并子节点(2))