python遍历xml所有节点dom_遍历 xml 所有的节点

element.xml

shangwu

Online

location_xml.py

#!/usr/bin/env python

# -*- coding: utf-8 -*-

# @Date : 2018-04-15 09:00:00

# @Author : Canon

# @Link : https://www.python.org

# @Version : 3.6.1

import xml.etree.ElementTree as ET

import xml.dom.minidom as minidom

import HTMLParser

import sys,os,string

# 获取标签的文本

# tree = minidom.parse("element.xml")

# p = tree.documentElement

# val = p.getElementsByTagName("span")

# print(val[0].firstChild.data)

# 获取所有节点

# html = open("element.xml", "r", encoding="utf-8").read()

# tagstack = []

# class ShowStructure(HTMLParser.HTMLParser):

# def handle_starttag(self, tag, attrs):

# tagstack.append(tag)

# def handle_endtag(self, tag):

# tagstack.pop()

# def handle_data(self, data):

# if data.strip():

# for tag in tagstack:

# sys.stdout.write('/'+tag)

# sys.stdout.write(' >> %s\n' % data[:40].strip())

# ShowStructure().feed(html)

# 全局唯一标识

unique_id = 0

# 遍历所有的节点

def walkData(root_node, level, result_list, loca="/section"):

global unique_id

unique_id += 1

temp_list =[unique_id, level, root_node.tag, root_node.attrib, "/{}".format(root_node.tag), root_node.text]

result_list.append(temp_list)

if unique_id != 1:

temp_list[4] = loca

temp_list[5] = root_node.text

# 遍历每个子节点

children_node = root_node.getchildren()

count_up = 1

if len(children_node) == 0:

loca += "/{}[{}]".format(root_node.tag, count_up)

return

# 找出每个标签的路径

for num_node in range(len(children_node)):

count = 1

for num in range(num_node):

if children_node[num_node].tag == children_node[num].tag:

count += 1

count_up += 1

new_loca = loca + "/{}[{}]".format(children_node[num_node].tag, count)

walkData(children_node[num_node], level + 1, result_list, new_loca)

return

def getXmlData(file_name, text):

# 节点的深度从1开始

level = 1

result_list = []

root = ET.parse(file_name).getroot()

walkData(root, level, result_list)

for result in result_list:

if result[5] == text:

loca_val = result[4]

return loca_val

R = getXmlData("element.xml", "Merchant Profile")

print(R)

你可能感兴趣的:(python遍历xml所有节点dom_遍历 xml 所有的节点)