python操作xml文件示例代码

python操作xml文件示例。

#!/usr/bin/python
# www.jbxue.com
def get_seed_data(filename):

dom = minidom.parse(filename)

root = dom.documentElement

system_nodes = root.getElementsByTagName("system")

k = 0

seed_list = []

for system_node in system_nodes:

#print system_node.nodeName+' id='+system_node.getAttribute('id')

system_id = system_node.getAttribute("id")

system_name = system_node.getAttribute("name")

#print 'system_name:%s'%system_name

section_nodes = system_node.getElementsByTagName("section")

for section_node in section_nodes:

section_id = section_node.getAttribute('id')

section_name = section_node.getAttribute('name')

#print ' '+section_node.nodeName+' id='+section_id+' name='+section_name

crawl_cycle_node = section_node.getElementsByTagName("crawl_cycle")

crawl_cycle = crawl_cycle_node[0].childNodes[0].nodeValue

#print ' '+crawl_cycle_node[0].nodeName+'='+crawl_cycle

seed_nodes = section_node.getElementsByTagName('seed')

for seed_node in seed_nodes:

seed = {}

seed['crawl_cycle'] = crawl_cycle

seed['system_id'] = int(system_id)

seed['system_name'] = system_name

seed['section_id'] = int(section_id)

seed['section_name'] = section_name

seed_id = seed_node.getAttribute('id')

seed['seed_id'] = int(seed_id)

#print ' '+seed_node.nodeName+' '+'id='+seed_id

userblog_url_node = seed_node.getElementsByTagName('userblog_url')

userblog_url = userblog_url_node[0].childNodes[0].nodeValue

seed['userblog_url'] = userblog_url

#print ' '+'userblog_url'+' '+userblog_url

print '-------------------------------------------'

print 'system_id:%d' % seed['system_id']

print 'system_name:%s'%seed['system_name']

print ' section_id:%d' % seed['section_id']

print ' section_name:%s' % seed['section_name']

print ' seed_id:%d' %seed['seed_id']

print ' userblog_url:%s' %seed['userblog_url']

print '========================='

seed_list.append(seed)

print seed_list[k]

k += 1

os.system('pause')

return seed_list

xml文件:

<?xml version="1.0" encoding="utf-8" ?>

<seeds>

<system id="1" name="新浪">

<section id="1" name="娱乐">

<crawl_cycle> </crawl_cycle>

<seed id="1">

<userblog_url>http://aaa.com.cn/loveissuuny</userblog_url>

</seed>

<seed id="2">

<userblog_url>http://aaa.com.cn/loveissuuny</userblog_url>

</seed>

<seed id="3">

<userblog_url>http://aaa.com.cn/sanxiazaixian</userblog_url>

</seed>

</section>

<section id="2" name="读书">

<crawl_cycle> </crawl_cycle>

<seed id="11">

<userblog_url>http://aaa.com.cn/twocold</userblog_url>

</seed>

<seed id="12">

<userblog_url>http://aaa.com.cn/u/1233526741</userblog_url>

</seed>

</section>

</system>

</seeds>

原文地址:python操作xml文件的例子

 

你可能感兴趣的:(python)