用Python去解析XML报文 V1.1

演示如何用Python解析XML。
如下分别是待解析的XML报文,尾部是Python 源码。
    1. <?xmlversion="1.0"encoding="iso8859-1"?>
    2. <viewid="CCBSA_v"msgType="1">
    3. <viewElemid="jrcfwq"dispName="集群">
    4. <viewElemid="fwqsl"dispName="服务器数量"value="1"/>
    5. <viewElemid="Master"dispName="主控节点"value="STAR"/>
    6. <viewElemid="Server">
    7. <viewElemid="SrvName"value="01"dispName="STAR">
    8. <viewElemid="Plat"dispName="平台信息">
    9. <viewElemid="PlatStat"dispName="平台状态">
    10. <viewElemid="MchPort"dispName="机器端口"value="12000"/>
    11. <viewElemid="MchState"dispName="机器状态"value="在线"/>
    12. <viewElemid="AllProc"dispName="总进程数"value="0"/>
    13. <viewElemid="EslProc"dispName="平台进程数"value="29"/>
    14. <viewElemid="SemNum"dispName="信号灯数"value="104"/>
    15. <viewElemid="DrqSem"dispName="DRQ使用信号灯"value="53"/>
    16. </viewElem>
    17. <viewElemid="CpuStat"dispName="CPU状态">
    18. <viewElemid="CpuIdle"dispName="idle"value="0"/>
    19. <viewElemid="CpuUser"dispName="user"value="0"/>
    20. <viewElemid="CpuNice"dispName="nice"value="0"/>
    21. <viewElemid="CpuSys"dispName="sys"value="0"/>
    22. <viewElemid="CpuWio"dispName="wio"value="0"/>
    23. </viewElem>
    24. <viewElemid="SysInfo"dispName="操作系统信息">
    25. <viewElemid="CpuNum"dispName="CPU颗数"value="0"/>
    26. <viewElemid="PhyMem"dispName="物理内存(K)"value="0"/>
    27. <viewElemid="VirtMem"dispName="虚拟内存(K)"value="0"/>
    28. <viewElemid="UsedMem"dispName="已用内存(K)"value="0"/>
    29. <viewElemid="exit_freeMem"dispName="可用内存(K)"value="0"/>
    30. <viewElemid="IFaceNum"dispName="网络接口数"value="0"/>
    31. </viewElem>
    32. <viewElemid="ProjInfo"dispName="项目信息">
    33. <viewElemid="ShmSize"dispName="部署共享内存大小(k)"value="15251168"/>
    34. <viewElemid="ShmKeep"dispName="部署共享内存剩余(k)"value="16769072"/>
    35. <viewElemid="BcbSize"dispName="交换共享内存大小(k)"value="6304"/>
    36. <viewElemid="BcbUsed"dispName="已用交换共享内存(k)"value="2208"/>
    37. <viewElemid="BcbMax"dispName="最大交换共享内存(k)"value="70816"/>
    38. <viewElemid="ChkNum"dispName="当前已用缓冲区段数"value="1"/>
    39. <viewElemid="DrqMsg"dispName="缓存消息数"value="0"/>
    40. <viewElemid="MsqNum"dispName="队列中的消息数"value="0"/>
    41. <viewElemid="TranNum"dispName="本机交易统计"value="322047"/>
    42. <viewElemid="DTAMPID"dispName="DTA管理服务进程号"value="3727372"/>
    43. <viewElemid="RCYCLPID"dispName="垃圾回收服务进程号"value="3739830"/>
    44. <viewElemid="SYNCPID"dispName="多机同步服务进程号"value="3670122"/>
    45. <viewElemid="ISSUEPID"dispName="多机发布服务进程号"value="2871348"/>
    46. <viewElemid="MACHSVRPID"dispName="多机交换服务进程号"value="0"/>
    47. <viewElemid="MONCPID"dispName="多机监控同步服务进程号"value="0"/>
    48. <viewElemid="MONSPID"dispName="多机监控发布服务进程号"value="0"/>
    49. <viewElemid="DTANUM"dispName="适配器个数"value="12"/>
    50. ....
    51. ....
    52. ......

  1. fromxml.domimportminidom
  2. fromsysimportstderr
  3. fromos.pathimportjoin
  4. __version__="V1.1"
  5. """
  6. Definitionfortypesandmaps
  7. """
  8. dataTypMap={"integer":0,"foat":1,"boolean":2,
  9. "char":3,"string":4,"void":5,"double":6,
  10. "long":7,"object":8,"datetime":9}
  11. nodeTypMap={"LEAF":0,"NODE":2}
  12. nodeImgMap={nodeTypMap["LEAF"]:"/images/leaf.gif",
  13. nodeTypMap["NODE"]:"/images/node.gif"}
  14. whoMap={"Views":0,"Nodes":1,
  15. "NodeViews":2,"NodesRel":3}
  16. classTIndicatorData:
  17. def__init__(self,XML,dest):
  18. self.__xmldoc=minidom.parse(XML.strip(""))
  19. self.__root=self.__xmldoc.documentElement
  20. self.__destPath=dest.strip("")
  21. self.__gNodesList=[]#Storinguniquenodes
  22. self.__hViews=None#Filehandleforisac_mnt_view
  23. self.__hNodes=None#Filehandleforisac_mnt_basenode
  24. self.__hNodeViews=None#Filehandleforisac_mnt_basenode_view
  25. self.__hNodesRel=None#Filehandleforisac_mnt_basenode_rel
  26. self.__outputData={"Views":"","Nodes":"","NodeViews":"","NodesRel":""}
  27. #enddef
  28. defGenIndicators(self):
  29. self.__hViews=open(join(self.__destPath,"isac_mnt_view.txt"),"w+")
  30. self.__hNodes=open(join(self.__destPath,"isac_mnt_basenode.txt"),"w+")
  31. self.__hNodeViews=open(join(self.__destPath,"isac_mnt_basenode_view.txt"),"w+")
  32. self.__hNodesRel=open(join(self.__destPath,"isac_mnt_basenode_rel.txt"),"w+")
  33. try:
  34. self.__retrieveIndViewData("Nodes")
  35. self.__retrieveIndViewData("Views")
  36. self.__retrieveIndViewData("NodeViews")
  37. self.__retrieveIndViewData("NodesRel")
  38. self.__write(self.__hNodes,"Nodes")
  39. self.__write(self.__hViews,"Views")
  40. self.__write(self.__hNodeViews,"NodeViews")
  41. self.__write(self.__hNodesRel,"NodesRel")
  42. finally:
  43. self.__hViews.close()
  44. self.__hNodes.close()
  45. self.__hNodeViews.close()
  46. self.__hNodesRel.close()
  47. #enddef
  48. def__write(self,fileHandle,outputDataName):
  49. ifnotfileHandle:
  50. stderr.write("Invalidfilehandlecorrespondingto%s"%outputDataName)
  51. return
  52. fileHandle.write(self.__outputData[outputDataName.strip("")].encode("GB2312"))
  53. printfileHandle.name
  54. def__retrieveIndViewData(self,whoMapKeyName):
  55. self.__getNodes(self.__root,whoMap[whoMapKeyName])
  56. self.__clearNodesList()
  57. def__clearNodesList(self):
  58. self.__gNodesList=[]
  59. def__getNodePaths(self,ANode,isDynNode=True,initPath=""):
  60. """
  61. retrievesthevalueofstringofstaticnodesordynamicnodes
  62. excluededleaf-nodes.
  63. """
  64. if(notANode)or(notANode.hasChildNodes()):
  65. returninitPath
  66. ifinitPath.strip("")=="":
  67. initPath="/%s"%ANode.attributes["id"].value.strip("")
  68. foreleminANode.childNodes:
  69. if(notelem.localName)or(notelem.hasChildNodes()):
  70. continue
  71. idPath=""
  72. ifisDynNodeandelem.hasAttribute("value"):
  73. idPath="%s/%s"%(initPath,elem.attributes["value"].value.strip(""))
  74. else:
  75. idPath="%s/%s"%(initPath,elem.attributes["id"].value.strip(""))
  76. printidPath
  77. self.__getNodePaths(elem,isDynNode,idPath)
  78. #endfor
  79. #enddef
  80. def__getNodeType(self,ANode):
  81. ret=nodeTypMap["LEAF"]
  82. ifANode.hasChildNodes():
  83. ret=nodeTypMap["NODE"]
  84. returnret
  85. #enddef
  86. def__outputNodes(self,ANode):
  87. """
  88. generatestherecordsofuniquenodesinXML
  89. """
  90. ifnotANode:
  91. return
  92. nodeType=self.__getNodeType(ANode)
  93. dataType=dataTypMap["void"]
  94. ifnodeType==nodeTypMap["LEAF"]andANode.hasAttribute("value"):
  95. dataValue=ANode.attributes["value"].value.strip("")
  96. ifdataValue.isdigit():
  97. dataType=dataTypMap["integer"]
  98. else:
  99. try:
  100. float(dataValue)
  101. dataType=dataTypMap["float"]
  102. except:
  103. dataType=dataTypMap["string"]
  104. dispName=ANode.attributes["id"].value.strip("")
  105. ifANode.hasAttribute("dispName"):
  106. dispName=ANode.attributes["dispName"].value.strip("")/
  107. .encode("ISO8859").decode("GB2312")
  108. ProcType=0
  109. PrstType=1
  110. UnitName=''
  111. initValue='0'
  112. updateTime=''
  113. self.__outputData["Nodes"]+="%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|/n"%/
  114. (ANode.attributes["id"].value.strip(""),
  115. dispName,dispName,nodeImgMap[nodeType],
  116. nodeType,dataType,ProcType,PrstType,
  117. UnitName,initValue,updateTime)
  118. #enddef
  119. def__outputViews(self,ANode):
  120. """
  121. generatestherecordsofviews.
  122. """
  123. if(notANode)or(self.__getNodeType(ANode)!=nodeTypMap["NODE"]):
  124. return
  125. nodeID=ANode.attributes["id"].value.strip("")
  126. viewName=nodeID
  127. ifANode.hasAttribute("dispName"):
  128. viewName=ANode.attributes["dispName"].value/
  129. .encode("ISO8859").decode("GB2312")
  130. self.__outputData["Views"]+="%s|%s|%s|/n"%(nodeID,viewName,nodeID)
  131. #enddef
  132. def__outputNodeViews(self,ANode):
  133. ifnotANode:
  134. return
  135. ifself.__getNodeType(ANode)==nodeTypMap["NODE"]:
  136. self.__outputData["NodeViews"]+=/
  137. "%s|%s|/n"%/
  138. (ANode.attributes["id"].value.strip(""),
  139. ANode.attributes["id"].value.strip(""))
  140. else:
  141. self.__outputData["NodeViews"]+=/
  142. "%s|%s|/n"%/
  143. (ANode.attributes["id"].value.strip(""),
  144. ANode.parentNode.attributes["id"].value.strip(""))
  145. #enddef
  146. def__outputNodesRel(self,ANode):
  147. if(notANode)or(ANode.parentNode.nodeType==ANode.DOCUMENT_NODE):
  148. return
  149. self.__outputData["NodesRel"]+="%s|%s|/n"%/
  150. (ANode.parentNode.attributes["id"].value.strip(""),
  151. ANode.attributes["id"].value.strip(""))
  152. #enddef
  153. def__handleByType(self,ANode,Who):
  154. """
  155. Determinewhichkindofdatashouldbeoutput.
  156. """
  157. ifnotANode:
  158. return
  159. ifWho==whoMap["Nodes"]:
  160. self.__outputNodes(ANode)
  161. elifWho==whoMap["Views"]:
  162. self.__outputViews(ANode)
  163. elifWho==whoMap["NodeViews"]:
  164. self.__outputNodeViews(ANode)
  165. elifWho==whoMap["NodesRel"]:
  166. self.__outputNodesRel(ANode)
  167. #enddef
  168. def__getNodes(self,ANode,Who):
  169. """retrievesallstaticnodes"""
  170. ifnotANode:
  171. return
  172. ifANode.parentNode.nodeType==ANode.DOCUMENT_NODE:
  173. self.__handleByType(ANode,Who)
  174. foreleminANode.childNodes:
  175. ifnotelem.localName:
  176. continue
  177. ifnotelem.hasAttribute("id"):
  178. stderr.write("Invalidtagwithoutanattributenamed'id'FOUND!")
  179. continue
  180. tagID=elem.attributes["id"].value.strip("")
  181. try:
  182. self.__gNodesList.index(tagID)
  183. continue
  184. exceptValueError:
  185. #NOTFOUND
  186. self.__gNodesList.append(tagID)
  187. self.__handleByType(elem,Who)
  188. self.__getNodes(elem,Who)
  189. #enddef
  190. #endclass
  191. if__name__=="__main__":
  192. """
  193. CAUTION:PleasesetencodingbyISO8859inXMLhead!
  194. """
  195. ind=TIndicatorData(
  196. r"D:/Documents/Construction_Bank/SH/CCBSA/Design/ccbsa_ind21.xml",
  197. r"D:/Temp")
  198. ind.GenIndicators()
  199. print"/nMISSIONCOMPLETED."
  200. #ENDOFFILE.

你可能感兴趣的:(python)