<?xml version="1.0" encoding="ISO-8859-1"?> <root> <description>All application groups.</description> <groups> <group name="bossAppGroup"> <machine name="bossNode1" type="virtual"> <ip></ip> <hostname>bossApp1</hostname> </machine> <machine name="bossNode2" type="concrete"> <ip></ip> <hostname>bossApp2</hostname> </machine> </group> <group name="wlanAppGroup"> <machine name="wlanNode1" type="concrete"> <ip></ip> <hostname>wlan1</hostname> </machine> <machine name="wlanNode2" type="concrete"> <ip></ip> <hostname>wlan2</hostname> </machine> </group> </groups> </root>
bossAppGroup{bossNode1[type=virtual,IP=,hostname=bossApp1],bossNode2[type=concrete,IP=,hostname=bossApp2]} wlanAppGroup{wlanNode1[type=concrete,IP=,hostname=wlan1],wlanNode2[type=concrete,IP=,hostname=wlan2]}
2.1 公有常量类
package parser; public class Constant { public final static String ROOT = "root"; public final static String GROUPS = "groups"; public final static String GROUP = "group"; public final static String MACHINE = "machine"; public final static String IP = "ip"; public final static String HOSTNAME = "hostname"; public final static String NAME = "name"; public final static String TYPE = "type"; public final static String TYPE_CONCRETE = "concrete"; public final static String TYPE_VIRTUAL = "virtual"; }
2.2 Parser接口及对应的数据model
package parser; import java.util.List; import module.Group; public interface MachineParser { /** * Return the machine list which in the given xml file. * * @param xmlFilePath * The xml file path which contains all machine info. * @return formatted String of machines. */ public List<Group> getGroupList(String xmlFilePath) throws Exception; }
package module; import java.util.ArrayList; import java.util.List; public class Group { private String name; private List<Machine> machineList; public Group(String _name){ this.name = _name; machineList = new ArrayList<Machine>(); } public void addMachine(Machine machine){ machineList.add(machine); } public void removeMachine(Machine machine){ machineList.remove(machine); } public String getName() { return name; } public void setName(String name) { this.name = name; } public List<Machine> getMachineList() { return machineList; } public void setMachineList(List<Machine> machineList) { this.machineList = machineList; } @Override public String toString() { StringBuffer sb = new StringBuffer(); sb.append(name).append("{"); for (int i = 0; i < machineList.size(); i++) { sb.append(machineList.get(i)); if (i != (machineList.size() - 1)) { sb.append(","); } } sb.append("}"); return sb.toString(); } }
Machine 类包含了主机IP和主机名属性,为了输出,也重写了toString方法。
package module; public class Machine { private String name; private String type; private String ip; private String hostname; public Machine(String _name) { this.name = _name; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getType() { return type; } public void setType(String type) { this.type = type; } public String getIp() { return ip; } public void setIp(String ip) { this.ip = ip; } public String getHostname() { return hostname; } public void setHostname(String hostname) { this.hostname = hostname; } @Override public String toString() { StringBuffer sb = new StringBuffer(); sb.append(name).append("[type=" + type).append(",IP=" + ip) .append(",hostname=" + hostname).append("]"); return sb.toString(); } }
2.3 DOM解析实现
package parser.dom; import java.io.IOException; import java.util.ArrayList; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import module.Group; import module.Machine; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import parser.Constant; import parser.MachineParser; /** * JDK Dom Parser. All XML element or attribute be processed as one * "org.w3c.dom.Node" instance. * * @author xuqingkang * */ public class JDKDomParser implements MachineParser { DocumentBuilder builder = null; /** * Constructor * @throws Exception */ public JDKDomParser() throws Exception { builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); } /** * Implement of MachineParser#getMachineList */ public List<Group> getGroupList(String xmlFilePath) throws SAXException, IOException { Document doc = builder.parse(xmlFilePath); // Find All Groups NodeList groupList = doc.getElementsByTagName(Constant.GROUP); int groupCount = groupList.getLength(); if (groupCount <= 0) { return null; } List<Group> machineGroups = new ArrayList<Group>(); for (int i = 0; i < groupCount; i++) { // XML attribute value can be parse with Node#getTextContent() or // Node#getNodeValue() method NamedNodeMap groupAttrMap = groupList.item(i).getAttributes(); // String groupName = // groupAttrMap.getNamedItem(Constant.NAME).getTextContent(); String groupName = groupAttrMap.getNamedItem(Constant.NAME) .getNodeValue(); Group group = new Group(groupName); parseMachinesOfGroup(groupList.item(i), group); machineGroups.add(group); } return machineGroups; } private void parseMachinesOfGroup(Node groupNode, Group group) { NodeList machineList = groupNode.getChildNodes(); if (machineList == null || machineList.getLength() <= 0) { return; } // Iterate <machine> nodes of one <group> node. for (int i = 0; i < machineList.getLength(); i++) { Node machineNode = machineList.item(i); if (machineNode.getNodeName() == null || !machineNode.getNodeName().equals(Constant.MACHINE)) { continue; } NodeList machineChildren = machineNode.getChildNodes(); // When XML Attribute value, either Node#getTextContent() or // Node#getNodeValue() be ok. String mName = machineNode.getAttributes() .getNamedItem(Constant.NAME).getTextContent(); String mType = machineNode.getAttributes() .getNamedItem(Constant.TYPE).getNodeValue(); Machine machine = new Machine(mName); machine.setType(mType); for (int j = 0; j < machineChildren.getLength(); j++) { Node machineChild = machineChildren.item(j); if (machineChild.getNodeName().equals(Constant.IP)) { machine.setIp(machineChild.getTextContent()); } else if (machineChild.getNodeName().equals(Constant.HOSTNAME)) { machine.setHostname(machineChild.getTextContent()); } } group.addMachine(machine); } } }
2.4 SAX解析实现
package parser.sax; import java.util.List; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.XMLReaderFactory; import parser.MachineParser; import module.Group; /** * SAX的解析是以事件为驱动的,不会像DOM那样把文档全部加在到内存中。 * SAX的解析一般借助栈来简化整个解析过程,一般一个XML节点开始入站,结束时出栈保存值。 * @author xuqingkang * */ public class JDKSaxParser implements MachineParser { XMLReader reader = null; /** * @throws SAXException */ public JDKSaxParser() throws SAXException{ reader = XMLReaderFactory.createXMLReader(); } public List<Group> getGroupList(String xmlFilePath) throws Exception { ResourceHandler handler = new ResourceHandler(); reader.setContentHandler(handler); reader.parse(xmlFilePath); return handler.getGroupList(); } }
package parser.sax; import java.util.ArrayList; import java.util.List; import java.util.Stack; import module.Group; import module.Machine; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; import parser.Constant; public class ResourceHandler extends DefaultHandler { List<Group> groupList = null; /** * <group>节点开始时压栈,结束时出栈 */ Stack<Group> groupStack = new Stack<Group>(); /** * <machine>节点开始时压栈,结束时出栈 */ Stack<Machine> machineStack = new Stack<Machine>(); /** * 对<ip>和<hostname>这样的节点,因为是节点+文本类型,因此需要用这个标示节点的内容 */ String currentNodeText; /** * XML节点开始时的处理方法: * 主要是遇到<group>和<machine>节点的开始标签时需要把Group和Machine入栈 */ public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if (Constant.GROUPS.equals(qName)) { groupList = new ArrayList<Group>(); } else if (Constant.GROUP.equals(qName)) { String groupName = attributes.getValue(Constant.NAME); Group group = new Group(groupName); groupStack.push(group); }else if(Constant.MACHINE.equals(qName)){ String machineName = attributes.getValue(Constant.NAME); String machineType = attributes.getValue(Constant.TYPE); Machine machine = new Machine(machineName); machine.setType(machineType); machineStack.push(machine); } } /** * XML节点直接加文本时的处理: * 主要是处理<ip>和<hostname>节点的值 */ public void characters(char[] ch, int start, int length) throws SAXException { currentNodeText = String.valueOf(ch, start, length); } /** * XML节点结束时的处理方法: * 1,主要是遇到<group>和<machine>节点的结束标签时需要把Group和Machine出栈保存 * 2,遇到<ip>和<hostname>节点的结束标签时需要保存当前文本为相应的属性值并清空当前文本变量 */ public void endElement(String uri, String localName, String qName) throws SAXException { if (Constant.GROUP.equals(qName)) { groupList.add(groupStack.pop()); }else if(Constant.MACHINE.equals(qName)){ groupStack.peek().addMachine(machineStack.pop()); }else if(Constant.IP.equals(qName)){ // <ip>节点结束时“currentNodeText”的值即为IP machineStack.peek().setIp(currentNodeText); currentNodeText = null; }else if(Constant.HOSTNAME.equals(qName)){ // <hostname>节点结束时“currentNodeText”的值即为HOSTNAME machineStack.peek().setHostname(currentNodeText); currentNodeText = null; } } public List<Group> getGroupList() { return groupList; } }
2.5 JDOM解析实现
JDOM借助于XPath完成解析,需要jdom的jar包实现及jaxen jar包(主要用于处理XPath)实现。
package parser.jdom; import java.io.FileInputStream; import java.util.ArrayList; import java.util.List; import module.Group; import module.Machine; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.input.SAXBuilder; import org.jdom.xpath.XPath; import parser.Constant; import parser.MachineParser; /** * JDomParser结合XPath对XML节点进行解析 * @author xuqingkang * */ public class JDomParser implements MachineParser { SAXBuilder builder; public JDomParser() { builder = new SAXBuilder(); } public List<Group> getGroupList(String xmlFilePath) throws Exception { FileInputStream stream = null; try { stream = new FileInputStream(xmlFilePath); Document document = builder.build(stream); Element root = document.getRootElement(); List<Element> groupElements = XPath.selectNodes(root, "/" + Constant.ROOT + "/" + Constant.GROUPS + "/" + Constant.GROUP + "[name=wlanAppGroup]"); return parse(root); } finally { try { if (stream != null) { stream.close(); } } catch (Exception e) { } } } private List<Group> parse(Element root) throws JDOMException { List<Group> groupList = new ArrayList<Group>(); // <group>节点是从根节点开始select,因此xpath需要使用绝对路径(/打头) String xPath = "/" + Constant.ROOT + "/" + Constant.GROUPS + "/" + Constant.GROUP; // <group>节点有多个,因此需要使用XPath.selectNodes List<Element> groupElements = XPath.selectNodes(root, xPath); for (int i = 0; i < groupElements.size(); i++) { Element groupElement = groupElements.get(i); String groupName = groupElement.getAttributeValue(Constant.NAME); Group group = new Group(groupName); parseGroup(groupElement, group); groupList.add(group); } return groupList; } private void parseGroup(Element groupElement, Group group) throws JDOMException { // 一个<group>节点下有多个<machine>节点,因此需要使用XPath.selectNodes List<Element> machineElements = XPath.selectNodes(groupElement, Constant.MACHINE); for (int i = 0; i < machineElements.size(); i++) { Element machineElement = machineElements.get(i); String machineName = machineElement .getAttributeValue(Constant.NAME); String machineType = machineElement .getAttributeValue(Constant.TYPE); String ip = ((Element) XPath.selectSingleNode(machineElement, Constant.IP)).getText(); String hostname = ((Element) XPath.selectSingleNode(machineElement, Constant.HOSTNAME)).getText(); Machine machine = new Machine(machineName); machine.setType(machineType); machine.setIp(ip); machine.setHostname(hostname); group.addMachine(machine); } }
2.6 DOM4J解析实现
DOM4J也借助于XPath完成解析,起相应的Element等类就包装了JDOM中需要使用XPath类才能完成的路径选取功能。运行时也需要dom4j的实现jar包及jaxen jar包。
package parser.dom4j; import java.io.FileInputStream; import java.util.ArrayList; import java.util.List; import org.dom4j.Document; import org.dom4j.Element; import org.dom4j.io.SAXReader; import module.Group; import module.Machine; import parser.Constant; import parser.MachineParser; /** * * Dom4JParser也结合XPath对XML节点进行解析 * @author xuqingkang * */ public class Dom4JParser implements MachineParser { SAXReader reader; public Dom4JParser() { reader = new SAXReader(); } public List<Group> getGroupList(String xmlFilePath) throws Exception { FileInputStream stream = null; try { stream = new FileInputStream(xmlFilePath); Document document = reader.read(stream); Element root = document.getRootElement(); return parse(root); } finally { try { if (stream != null) { stream.close(); } } catch (Exception e) { } } } private List<Group> parse(Element root) { List<Group> groupList = new ArrayList<Group>(); // <group>节点是从根节点开始select,因此xpath需要使用绝对路径(/打头) String xPath = "/" + Constant.ROOT + "/" + Constant.GROUPS + "/" + Constant.GROUP; // <group>节点有多个,因此需要使用selectNodes List<Element> groupElements = root.selectNodes(xPath); for (int i = 0; i < groupElements.size(); i++) { Element groupElement = groupElements.get(i); String groupName = groupElement.attributeValue(Constant.NAME); Group group = new Group(groupName); parseGroup(groupElement, group); groupList.add(group); } return groupList; } private void parseGroup(Element groupElement, Group group) { // 一个<group>节点下有多个<machine>节点,因此需要使用XPath.selectNodes List<Element> machineElements = groupElement .selectNodes(Constant.MACHINE); for (int i = 0; i < machineElements.size(); i++) { Element machineElement = machineElements.get(i); String machineName = machineElement.attributeValue(Constant.NAME); String machineType = machineElement.attributeValue(Constant.TYPE); String ip = ((Element) machineElement.selectSingleNode(Constant.IP)) .getText(); String hostname = ((Element) machineElement .selectSingleNode(Constant.HOSTNAME)).getText(); Machine machine = new Machine(machineName); machine.setType(machineType); machine.setIp(ip); machine.setHostname(hostname); group.addMachine(machine); } } }
2.7 StAX解析实现
package parser.stax; import java.io.FileReader; import java.util.ArrayList; import java.util.List; import java.util.Stack; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import javax.xml.stream.events.XMLEvent; import module.Group; import module.Machine; import parser.Constant; import parser.MachineParser; /** * StAX api and RI could be downloaded from http://dist.codehaus.org/stax/jars/ * */ public class STAXParser implements MachineParser { List<Group> groupList = null; /** * <group>节点开始时压栈,结束时出栈 */ Stack<Group> groupStack = new Stack<Group>(); /** * <machine>节点开始时压栈,结束时出栈 */ Stack<Machine> machineStack = new Stack<Machine>(); XMLInputFactory factory; public STAXParser() { factory = XMLInputFactory.newInstance(); } public List<Group> getGroupList(String xmlFilePath) throws Exception { XMLStreamReader reader = null; try { FileReader fileReader = new FileReader(xmlFilePath); reader = factory.createXMLStreamReader(fileReader); process(reader); } finally { try { reader.close(); } catch (Exception e) { } } return groupList; } /** * 循环处理,直到到达文件末尾 * @param xmlr * @throws XMLStreamException */ private void process(XMLStreamReader xmlr) throws XMLStreamException { xmlr.next(); while (xmlr.getEventType() != XMLStreamConstants.END_DOCUMENT) { processEvent(xmlr); xmlr.next(); } } /** * 处理单个事件:节点开始、节点结束... * @param xmlr * @throws XMLStreamException */ private void processEvent(XMLStreamReader xmlr) throws XMLStreamException { switch (xmlr.getEventType()) { case XMLEvent.START_ELEMENT: if (xmlr.getLocalName().equals(Constant.GROUPS)) { groupList = new ArrayList<Group>(); } else if (xmlr.getLocalName().equals(Constant.GROUP)) { // parameter namespace is "". String groupName = xmlr.getAttributeValue(null, Constant.NAME); Group group = new Group(groupName); groupStack.push(group); } else if (xmlr.getLocalName().equals(Constant.MACHINE)) { String machineName = xmlr.getAttributeValue(null, Constant.NAME); String machinType = xmlr.getAttributeValue(null, Constant.TYPE); Machine machine = new Machine(machineName); machine.setType(machinType); machineStack.push(machine); } else if (xmlr.getLocalName().equals(Constant.IP)) { String ip = xmlr.getElementText(); machineStack.peek().setIp(ip); } else if (xmlr.getLocalName().equals(Constant.HOSTNAME)) { String hostname = xmlr.getElementText(); machineStack.peek().setHostname(hostname); } break; case XMLEvent.END_ELEMENT: if (xmlr.getLocalName().equals(Constant.GROUP)) { groupList.add(groupStack.pop()); } else if (xmlr.getLocalName().equals(Constant.MACHINE)) { groupStack.peek().addMachine(machineStack.pop()); } break; case XMLEvent.CHARACTERS: break; case XMLEvent.SPACE: break; case XMLEvent.PROCESSING_INSTRUCTION: break; case XMLEvent.CDATA: break; case XMLEvent.COMMENT: break; case XMLEvent.ENTITY_REFERENCE: break; case XMLEvent.START_DOCUMENT: break; } } }
以上代码的解析逻辑基本一样,在此基础之上解析相同的文件,在我机器(i5三代CPU、8G DDR3内存)上得出的结果为
3.1 采用前言的文件
解析技术 | 采样三次耗时 | 耗时平均值 |
SAX | 3765 | 3738 |
3690 | ||
3760 | ||
DOM | 4304 | 4498 |
4682 | ||
4510 | ||
JDOM 1.1.3 | 6928 | 7147 |
7213 | ||
7301 | ||
DOM4J 1.6 | 8513 | 8157 |
8055 | ||
7904 | ||
StAX 1.2 | 4586 | 4457 |
4408 | ||
4378 |
3.2 小结
import java.util.List; import module.Group; import parser.MachineParser; import parser.dom.JDKDomParser; import parser.dom4j.Dom4JParser; import parser.jdom.JDomParser; import parser.sax.JDKSaxParser; import parser.stax.STAXParser; public class ParseMain { /** * @param args */ public static void main(String[] args) { String xmlFile = "appGroups.xml"; MachineParser parser = null; try { String parseType = "stax"; int count = 20000; if(args.length > 0){ parseType = args[0]; }else if(args.length > 1){ count = Integer.parseInt(args[1]); } long startTime = System.currentTimeMillis(); if("sax".equalsIgnoreCase(parseType)){ parser = new JDKSaxParser(); }else if("dom4j".equalsIgnoreCase(parseType)){ parser = new Dom4JParser(); }else if("jdom".equalsIgnoreCase(parseType)){ parser = new JDomParser(); }else if("stax".equalsIgnoreCase(parseType)){ parser = new STAXParser(); }else if("dom".equalsIgnoreCase(parseType)){ parser = new JDKDomParser(); }else{ parser = new JDKDomParser(); } List<Group> groupList = null; for(int i =0;i<count;i++){ if(i == (count -1)){ groupList = parser.getGroupList(xmlFile); }else{ parser.getGroupList(xmlFile); } } for (Group group : groupList) { System.out.println(group); } long endTime = System.currentTimeMillis(); System.out.println("Exceute parse \"" + count + "\" times. and spent \"" + (endTime - startTime) + "\" milliseconds."); } catch (Exception e) { e.printStackTrace(); } } }