转自:http://qingkangxu.iteye.com/blog/1838405
一个软件项目中,中间件配置、应用参数配置等常常都用XML文件的格式保存,XML的解析有很多的现有框架,本文主要是对包含DOM、SAX、Dom4j、JDOM、StAX等解析XML文件的代码示例,并做简单的分析及性能对比。
1,前言
假设有如下的XML文件,其根节点为,节点可以包含多个,一个为一个需要分布式部署的应用组,group下有多个标识部署应用的具体机器,其他的type属性标识机器是否为虚拟机。
- xml version="1.0" encoding="ISO-8859-1"?>
- <root>
- <description>All application groups.description>
- <groups>
- <group name="bossAppGroup">
- <machine name="bossNode1" type="virtual">
- <ip>192.168.0.11ip>
- <hostname>bossApp1hostname>
- machine>
- <machine name="bossNode2" type="concrete">
- <ip>192.168.0.12ip>
- <hostname>bossApp2hostname>
- machine>
- group>
- <group name="wlanAppGroup">
- <machine name="wlanNode1" type="concrete">
- <ip>192.168.0.21ip>
- <hostname>wlan1hostname>
- machine>
- <machine name="wlanNode2" type="concrete">
- <ip>192.168.0.22ip>
- <hostname>wlan2hostname>
- machine>
- group>
- groups>
- root>
对于以上文件,以下列举出使用不同的XML解析技术的示例代码。
2,解析技术示例代码
以下列举了SAX、DOM等解析示例,代码完成的功能都是一样的。需要解析前言中提到的XML文件,获取到机器列表,以组名{机器名[属性=只,]}的格式输出。如:
- bossAppGroup{bossNode1[type=virtual,IP=192.168.0.11,hostname=bossApp1],bossNode2[type=concrete,IP=192.168.0.12,hostname=bossApp2]}
- wlanAppGroup{wlanNode1[type=concrete,IP=192.168.0.21,hostname=wlan1],wlanNode2[type=concrete,IP=192.168.0.22,hostname=wlan2]}
2.1 公有常量类
- package parser;
- public class Constant {
-
- public final static String ROOT = "root";
- public final static String GROUPS = "groups";
- public final static String GROUP = "group";
- public final static String MACHINE = "machine";
- public final static String IP = "ip";
- public final static String HOSTNAME = "hostname";
- public final static String NAME = "name";
- public final static String TYPE = "type";
-
- public final static String TYPE_CONCRETE = "concrete";
- public final static String TYPE_VIRTUAL = "virtual";
- }
2.2 Parser接口及对应的数据model
- package parser;
- import java.util.List;
-
- import module.Group;
-
-
- public interface MachineParser {
-
-
-
-
-
-
-
-
- public List getGroupList(String xmlFilePath) throws Exception;
- }
Group类包含了主机List属性,为了输出,重写了toString方法。
- package module;
-
- import java.util.ArrayList;
- import java.util.List;
-
- public class Group {
-
- private String name;
- private List machineList;
-
- public Group(String _name){
- this.name = _name;
- machineList = new ArrayList();
- }
-
- public void addMachine(Machine machine){
- machineList.add(machine);
- }
-
- public void removeMachine(Machine machine){
- machineList.remove(machine);
- }
-
- public String getName() {
- return name;
- }
- public void setName(String name) {
- this.name = name;
- }
- public List getMachineList() {
- return machineList;
- }
- public void setMachineList(List machineList) {
- this.machineList = machineList;
- }
-
- @Override
- public String toString() {
- StringBuffer sb = new StringBuffer();
- sb.append(name).append("{");
-
- for (int i = 0; i < machineList.size(); i++) {
- sb.append(machineList.get(i));
- if (i != (machineList.size() - 1)) {
- sb.append(",");
- }
- }
- sb.append("}");
- return sb.toString();
- }
- }
Machine 类包含了主机IP和主机名属性,为了输出,也重写了toString方法。
- package module;
-
- public class Machine {
-
- private String name;
- private String type;
- private String ip;
- private String hostname;
-
- public Machine(String _name) {
- this.name = _name;
- }
-
- public String getName() {
- return name;
- }
-
- public void setName(String name) {
- this.name = name;
- }
-
- public String getType() {
- return type;
- }
-
- public void setType(String type) {
- this.type = type;
- }
-
- public String getIp() {
- return ip;
- }
- public void setIp(String ip) {
- this.ip = ip;
- }
- public String getHostname() {
- return hostname;
- }
- public void setHostname(String hostname) {
- this.hostname = hostname;
- }
-
- @Override
- public String toString() {
- StringBuffer sb = new StringBuffer();
- sb.append(name).append("[type=" + type).append(",IP=" + ip)
- .append(",hostname=" + hostname).append("]");
- return sb.toString();
- }
-
- }
2.3 DOM解析实现
DOM解析会比较耗内存,因为DOM需要一次性加载整个文件内容到内存中,解析大文件时不建议使用DOM。DOM解析也不需要导入第三方jar包,只需要JDK便可,比较轻量级。
2.4 SAX解析实现
SAX是基于事件的,其不会加载整个文件到内存中,属于按需取内容。一般使用SAX解析时都会借助于栈来完成元素和数据模型的衔接。
- package parser.sax;
- import java.util.List;
-
- import org.xml.sax.SAXException;
- import org.xml.sax.XMLReader;
- import org.xml.sax.helpers.XMLReaderFactory;
-
- import parser.MachineParser;
-
- import module.Group;
-
-
-
-
-
-
-
- public class JDKSaxParser implements MachineParser {
-
- XMLReader reader = null;
-
-
-
-
- public JDKSaxParser() throws SAXException{
- reader = XMLReaderFactory.createXMLReader();
- }
-
- public List getGroupList(String xmlFilePath) throws Exception {
- ResourceHandler handler = new ResourceHandler();
- reader.setContentHandler(handler);
- reader.parse(xmlFilePath);
-
- return handler.getGroupList();
- }
-
- }
对XML的处理过程主要在ResourceHandler类中
- package parser.sax;
-
- import java.util.ArrayList;
- import java.util.List;
- import java.util.Stack;
-
- import module.Group;
- import module.Machine;
-
- import org.xml.sax.Attributes;
- import org.xml.sax.SAXException;
- import org.xml.sax.helpers.DefaultHandler;
-
- import parser.Constant;
-
- public class ResourceHandler extends DefaultHandler {
-
- List groupList = null;
-
-
-
-
- Stack groupStack = new Stack();
-
-
-
-
- Stack machineStack = new Stack();
-
-
-
-
- String currentNodeText;
-
-
-
-
-
- public void startElement(String uri, String localName, String qName,
- Attributes attributes) throws SAXException {
- if (Constant.GROUPS.equals(qName)) {
- groupList = new ArrayList();
- } else if (Constant.GROUP.equals(qName)) {
- String groupName = attributes.getValue(Constant.NAME);
- Group group = new Group(groupName);
- groupStack.push(group);
- }else if(Constant.MACHINE.equals(qName)){
- String machineName = attributes.getValue(Constant.NAME);
- String machineType = attributes.getValue(Constant.TYPE);
-
- Machine machine = new Machine(machineName);
- machine.setType(machineType);
- machineStack.push(machine);
- }
- }
-
-
-
-
-
- public void characters(char[] ch, int start, int length)
- throws SAXException {
- currentNodeText = String.valueOf(ch, start, length);
- }
-
-
-
-
-
-
- public void endElement(String uri, String localName, String qName)
- throws SAXException {
- if (Constant.GROUP.equals(qName)) {
- groupList.add(groupStack.pop());
- }else if(Constant.MACHINE.equals(qName)){
- groupStack.peek().addMachine(machineStack.pop());
- }else if(Constant.IP.equals(qName)){
-
- machineStack.peek().setIp(currentNodeText);
- currentNodeText = null;
- }else if(Constant.HOSTNAME.equals(qName)){
-
- machineStack.peek().setHostname(currentNodeText);
- currentNodeText = null;
- }
- }
-
- public List getGroupList() {
- return groupList;
- }
- }
2.5 JDOM解析实现
JDOM借助于XPath完成解析,需要jdom的jar包实现及jaxen jar包(主要用于处理XPath)实现。
- package parser.jdom;
-
- import java.io.FileInputStream;
- import java.util.ArrayList;
- import java.util.List;
-
- import module.Group;
- import module.Machine;
-
- import org.jdom.Document;
- import org.jdom.Element;
- import org.jdom.JDOMException;
- import org.jdom.input.SAXBuilder;
- import org.jdom.xpath.XPath;
-
- import parser.Constant;
- import parser.MachineParser;
-
-
-
-
-
-
- public class JDomParser implements MachineParser {
-
- SAXBuilder builder;
-
- public JDomParser() {
- builder = new SAXBuilder();
- }
-
- public List getGroupList(String xmlFilePath) throws Exception {
- FileInputStream stream = null;
- try {
- stream = new FileInputStream(xmlFilePath);
-
- Document document = builder.build(stream);
- Element root = document.getRootElement();
-
- List groupElements = XPath.selectNodes(root, "/"
- + Constant.ROOT + "/" + Constant.GROUPS + "/"
- + Constant.GROUP + "[name=wlanAppGroup]");
- return parse(root);
- } finally {
- try {
- if (stream != null) {
- stream.close();
- }
- } catch (Exception e) {
- }
- }
- }
-
- private List parse(Element root) throws JDOMException {
- List groupList = new ArrayList();
-
- String xPath = "/" + Constant.ROOT + "/" + Constant.GROUPS + "/"
- + Constant.GROUP;
-
- List groupElements = XPath.selectNodes(root, xPath);
-
- for (int i = 0; i < groupElements.size(); i++) {
- Element groupElement = groupElements.get(i);
-
- String groupName = groupElement.getAttributeValue(Constant.NAME);
- Group group = new Group(groupName);
- parseGroup(groupElement, group);
- groupList.add(group);
- }
-
- return groupList;
- }
-
- private void parseGroup(Element groupElement, Group group)
- throws JDOMException {
-
- List machineElements = XPath.selectNodes(groupElement,
- Constant.MACHINE);
- for (int i = 0; i < machineElements.size(); i++) {
- Element machineElement = machineElements.get(i);
- String machineName = machineElement
- .getAttributeValue(Constant.NAME);
- String machineType = machineElement
- .getAttributeValue(Constant.TYPE);
- String ip = ((Element) XPath.selectSingleNode(machineElement,
- Constant.IP)).getText();
- String hostname = ((Element) XPath.selectSingleNode(machineElement,
- Constant.HOSTNAME)).getText();
-
- Machine machine = new Machine(machineName);
- machine.setType(machineType);
- machine.setIp(ip);
- machine.setHostname(hostname);
- group.addMachine(machine);
- }
-
- }
2.6 DOM4J解析实现
DOM4J也借助于XPath完成解析,起相应的Element等类就包装了JDOM中需要使用XPath类才能完成的路径选取功能。运行时也需要dom4j的实现jar包及jaxen jar包。
- package parser.dom4j;
-
- import java.io.FileInputStream;
- import java.util.ArrayList;
- import java.util.List;
-
- import org.dom4j.Document;
- import org.dom4j.Element;
- import org.dom4j.io.SAXReader;
-
- import module.Group;
- import module.Machine;
- import parser.Constant;
- import parser.MachineParser;
-
-
-
-
-
-
-
- public class Dom4JParser implements MachineParser {
-
- SAXReader reader;
-
- public Dom4JParser() {
- reader = new SAXReader();
- }
-
- public List getGroupList(String xmlFilePath) throws Exception {
- FileInputStream stream = null;
- try {
- stream = new FileInputStream(xmlFilePath);
-
- Document document = reader.read(stream);
- Element root = document.getRootElement();
-
- return parse(root);
- } finally {
- try {
- if (stream != null) {
- stream.close();
- }
- } catch (Exception e) {
- }
- }
- }
-
- private List parse(Element root) {
- List groupList = new ArrayList();
-
- String xPath = "/" + Constant.ROOT + "/" + Constant.GROUPS + "/"
- + Constant.GROUP;
-
- List groupElements = root.selectNodes(xPath);
-
- for (int i = 0; i < groupElements.size(); i++) {
- Element groupElement = groupElements.get(i);
-
- String groupName = groupElement.attributeValue(Constant.NAME);
- Group group = new Group(groupName);
- parseGroup(groupElement, group);
- groupList.add(group);
- }
-
- return groupList;
- }
-
- private void parseGroup(Element groupElement, Group group) {
-
- List machineElements = groupElement
- .selectNodes(Constant.MACHINE);
- for (int i = 0; i < machineElements.size(); i++) {
- Element machineElement = machineElements.get(i);
- String machineName = machineElement.attributeValue(Constant.NAME);
- String machineType = machineElement.attributeValue(Constant.TYPE);
- String ip = ((Element) machineElement.selectSingleNode(Constant.IP))
- .getText();
- String hostname = ((Element) machineElement
- .selectSingleNode(Constant.HOSTNAME)).getText();
-
- Machine machine = new Machine(machineName);
- machine.setType(machineType);
- machine.setIp(ip);
- machine.setHostname(hostname);
- group.addMachine(machine);
- }
-
- }
-
- }
2.7 StAX解析实现
StAX是基于流的,在代码的结构上和SAX非常的相似,也可以借助栈来完成文件的解析。运行时也需要使用StAX的实现jar包。
3,性能比较
以上代码的解析逻辑基本一样,在此基础之上解析相同的文件,在我机器(i5三代CPU、8G DDR3内存)上得出的结果为
3.1 采用前言的文件
对同一个文件解析20000次耗时情况
解析技术 |
采样三次耗时 |
耗时平均值 |
SAX |
3765 |
3738 |
|
3690 |
|
3760 |
DOM |
4304 |
4498 |
|
4682 |
|
4510 |
JDOM 1.1.3 |
6928 |
7147 |
|
7213 |
|
7301 |
DOM4J 1.6 |
8513 |
8157 |
|
8055 |
|
7904 |
StAX 1.2 |
4586 |
4457 |
|
4408 |
|
4378 |
3.2 小结
从小文件的解析来看,SAX的性能占优,是DOM4J的2倍以上。DOM和StAX与SAX的差距都不是很大。在书写应用的时候,往往由于JDOM和DOM4J具有比较好用的API,所以很多应用总都优先使用它,在对性能要求比较高的场合,还是建议使用SAX。
4,测试程序
如果在cmd下运行该测试程序请注意在classpath中添加附件给出的相关jar包。
- import java.util.List;
-
- import module.Group;
- import parser.MachineParser;
- import parser.dom.JDKDomParser;
- import parser.dom4j.Dom4JParser;
- import parser.jdom.JDomParser;
- import parser.sax.JDKSaxParser;
- import parser.stax.STAXParser;
-
- public class ParseMain {
-
-
-
-
- public static void main(String[] args) {
- String xmlFile = "appGroups.xml";
- MachineParser parser = null;
- try {
- String parseType = "stax";
- int count = 20000;
- if(args.length > 0){
- parseType = args[0];
- }else if(args.length > 1){
- count = Integer.parseInt(args[1]);
- }
-
- long startTime = System.currentTimeMillis();
- if("sax".equalsIgnoreCase(parseType)){
- parser = new JDKSaxParser();
- }else if("dom4j".equalsIgnoreCase(parseType)){
- parser = new Dom4JParser();
- }else if("jdom".equalsIgnoreCase(parseType)){
- parser = new JDomParser();
- }else if("stax".equalsIgnoreCase(parseType)){
- parser = new STAXParser();
- }else if("dom".equalsIgnoreCase(parseType)){
- parser = new JDKDomParser();
- }else{
- parser = new JDKDomParser();
- }
-
- List groupList = null;
- for(int i =0;i
- if(i == (count -1)){
- groupList = parser.getGroupList(xmlFile);
- }else{
- parser.getGroupList(xmlFile);
- }
- }
-
- for (Group group : groupList) {
- System.out.println(group);
- }
- long endTime = System.currentTimeMillis();
- System.out.println("Exceute parse \"" + count + "\" times. and spent \"" + (endTime - startTime) + "\" milliseconds.");
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- }
5,写在最后:
1,以上的性能比较还不是很全面,只是以简单小文件的解析结果作为比较。
2,本文只涉及XML的读取,不涉及XML的写入
3,XML解析还有JAXB框架,在JDK1.6中已经包含了该框架,此框架使用也比较简单,本文未涉及
4,SAX和StAX都基于事件机制,而JDOM和DOM4J借助了XPath
5,本文也不涉及DTD和Schema校验
6,本文所有代码和使用到的jar包都打包为附件,可直接下载查看。