<?xml version="1.0" encoding="utf-8"?> <users> <user id="1"> <name>Andrea</name> <age>10</age> <gender>Male</gender> </user> <user id="2"> <name>Bell</name> <age>20</age> <gender>Male</gender> </user> <user id="3"> <name>Calinda</name> <age>30</age> <gender>Female</gender> </user> </users>
上面即使基本的XML文件,一个XML文件分为头信息和数据区,头信息即上述XML文件的第一行,<?xml version="1.0" encoding="utf-8" ?>该头信息指定XML文件的版本和字符编码。除去第一行之后的内容都属于本XML文件的数据区,即数据保存部分。其中根元素为users,又包含3个user元素,user元素有id属性和3个标签元素,分别是name,age和gender。
下面介绍XML文件的四种解析方式,DOM方式,SAX解析,JDOM方式以及DOM4J方式,以简单工厂设计模式,先定义一个基本的XML解析接口作为产品接口,包含解析xml,和生成xml文件两个抽象方法,再定义4个具体的产品类即4种XML解析方式,以枚举方式描述。定义解析工厂。
/** * ParserMethod enum */ enum ParserMethod{ DOM,SAX,JDOM,DOM4J } /** * HelperFactory to manufacture XmlHelper * a static method is needed */ class XmlHelperFactory{ public static XmlHelper newInstance(ParserMethod method){ XmlHelper helper = null; switch (method) { case DOM: helper = new DomHelper(); break; case SAX: helper = new SaxHelper(); break; case JDOM: helper = new JDomHelper(); break; case DOM4J: helper = new Dom4JHelper(); break; default: break; } return helper; } } /** * Base XmlHelper interface */ interface XmlHelper{ void parseXml(String fileName)throws Exception ; void writeXml(String fileName) throws Exception ; }
第一种即为W3C推荐的标准,DOM解析方式,以DOM树的形式将整个XML文件结构装载到内存中,要知道对于一个大而复杂的XML文件,在内存中建立它的DOM树形式是非常耗费空间的,因此DOM解析方式适合解析那些XML文件较小,内容需要经常修改,结构较为简单的。对于小XML文件,使用DOM解析方式,非常简单,且易于理解,由于DOM树存于内存中,支持随意读取,对于上述XML文件,只要将其想象成一颗树,根元素为users。解析代码如下:
/** * DomHelper, a concrete XmlHelper product */ class DomHelper implements XmlHelper { private DocumentBuilder builder = null; public DomHelper(){ try{ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); builder = factory.newDocumentBuilder(); }catch(Exception e){ } } public void parseXml(String fileName) throws Exception { Document document = builder.parse(fileName); NodeList nodes = document.getChildNodes();//document.getElementByTagName("users"); for(int i=0;i<nodes.getLength();i++){ Node users = nodes.item(i); NodeList user = users.getChildNodes(); for(int j=0;j<user.getLength();j++){ Node userInfo = user.item(j); NamedNodeMap userAttr = userInfo.getAttributes(); if(userAttr != null && userAttr.getLength() > 0){ for(int at=0;at<userAttr.getLength();at++){ System.out.print(userAttr.item(at).getNodeName() + "=" + userAttr.item(at).getTextContent()); } } NodeList infoMeta = userInfo.getChildNodes(); for(int k=0;k<infoMeta.getLength();k++){ if(infoMeta.item(k).getNodeName() != "#text"){ System.out.print(infoMeta.item(k).getNodeName() + ":" + infoMeta.item(k).getTextContent()); } System.out.print("\t"); } System.out.println(); } } } public void writeXml(String fileName) throws Exception{ Document document = builder.newDocument(); Node users = document.createElement("users"); Node user = document.createElement("user"); Node name = document.createElement("name"); Node age = document.createElement("age"); Node gender = document.createElement("gender"); name.appendChild(document.createTextNode("zhangsan")); age.appendChild(document.createTextNode("20")); gender.appendChild(document.createTextNode("Male")); user.appendChild(name); user.appendChild(age); user.appendChild(gender); users.appendChild(user); document.appendChild(users); TransformerFactory factory = TransformerFactory.newInstance(); Transformer transformer = factory.newTransformer(); transformer.setOutputProperty(OutputKeys.ENCODING,"utf-8"); DOMSource source = new DOMSource(document); StreamResult result = new StreamResult(new File(fileName)); transformer.transform(source,result); } }
在主函数中使用该种解析方式,输出如下:
public class TestXml{ public static void main(String[] args) throws Exception { //Simple factory mode XmlHelper helper = XmlHelperFactory.newInstance(ParserMethod.DOM4J); helper.parseXml("./example.xml"); helper.writeXml("./output.xml"); } }
id=1 name:Andrea age:10 gender:Male
id=2 name:Bell age:20 gender:Male
id=3 name:Calinda age:30 gender:Female
第二种方式为SAX解析方式,也是W3C提供的xml解析标准,与DOM解析方式不同的是,SAX解析以事件驱动方式进行,顺序部分解析,还必须定义自己的事件处理器类,该类继承自DefaultHandler类。主要方法有startDocument方法,文档开始自动调用,endDocument文档结束自动调用,startElement,元素开始自动调用(根元素和节点元素都是如此),endElement,元素结束自动调用。characters方法遇到具体的元素结点内部值会自动调用,并解析。SAX解析方式不在乎文档大小,即时加载元素结点,存在于内存的东西相当少,因此SAX解析方式更加灵活,可以针对那些XML文档较大的解析,SAX易于读取,但不支持文件修改,因此transformXml方法无内容。
/** * SaxHelper, another XmlHelper product */ class SaxHelper implements XmlHelper { public void parseXml(String fileName) throws Exception{ SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser parser = factory.newSAXParser(); parser.parse(fileName,new MySaxHandler()); } public void writeXml(String fileName) throws Exception { } } class MySaxHandler extends DefaultHandler{ public void startDocument() throws SAXException { System.out.println("<?xml version=\"1.0\" encoding=\"utf-8\""); } public void endDocument() throws SAXException{ System.out.println("document ended!"); } public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { System.out.print("<"); System.out.print(qName); if(attributes.getLength() > 0){ for(int i=0;i<attributes.getLength();i++){ System.out.print(" "+attributes.getQName(i)+"=\"" + attributes.getValue(i)+"\""); } } System.out.print(">"); } public void endElement(String uri,String localName,String qName) throws SAXException { System.out.print("</"); System.out.print(qName); System.out.print(">"); } public void characters(char[] ch, int start, int length){ System.out.print(new String(ch,start,length)); } }
输出如下:
<?xml version="1.0" encoding="utf-8"
<users>
<user id="1">
<name>Andrea</name>
<age>10</age>
<gender>Male</gender>
</user>
<user id="2">
<name>Bell</name>
<age>20</age>
<gender>Male</gender>
</user>
<user id="3">
<name>Calinda</name>
<age>30</age>
<gender>Female</gender>
</user>
</users>document ended!
第三种方式JDOM方式,JDOM解析方式=DOM可以修改+SAX大文件读取,相对DOM解析方式,更加简单灵活,统一以Element组织。
/** * Dom4JHelper, another XmlHelper product */ class Dom4JHelper implements XmlHelper { public void parseXml(String fileName) throws Exception{ SAXReader reader = new SAXReader(); org.dom4j.Document document = reader.read(new File(fileName)); org.dom4j.Element users = document.getRootElement(); Iterator<org.dom4j.Element> userIter = users.elementIterator(); while (userIter.hasNext()) { org.dom4j.Element user = userIter.next(); List<org.dom4j.Attribute> userAttr = user.attributes(); for(int i=0;i<userAttr.size();i++){ org.dom4j.Attribute attr = userAttr.get(i); System.out.println(attr.getName()+":"+attr.getValue()); } Iterator<org.dom4j.Element> infoIter = user.elementIterator(); while(infoIter.hasNext()){ org.dom4j.Element info = infoIter.next(); System.out.println(info.getName()+":"+info.getText()); } System.out.println(); } } public void writeXml(String fileName) throws Exception { org.dom4j.Document document = DocumentHelper.createDocument(); org.dom4j.Element users = document.addElement("users"); org.dom4j.Element user = users.addElement("user"); org.dom4j.Element name = user.addElement("name"); org.dom4j.Element age = user.addElement("age"); org.dom4j.Element gender = user.addElement("gender"); user.addAttribute("id", "1"); name.setText("zhangsan"); age.setText("20"); gender.setText("Male"); OutputFormat format = OutputFormat.createPrettyPrint(); format.setEncoding("utf-8"); XMLWriter writer = new XMLWriter(new FileOutputStream(new File(fileName))); writer.write(document); writer.close(); } }
第四种方式为DOM4J方式,Hibernate和Spring等框架都采用DOM4J方式支持XML文件的解析。生成xml文件时,Document对象是依靠DocumentHelper的createDocument完成
class Dom4JParser implements XmlParser { public void parseXml(String fileName) throws Exception{ SAXReader reader = new SAXReader(); org.dom4j.Document document = reader.read(new File(fileName)); org.dom4j.Element users = document.getRootElement(); Iterator<org.dom4j.Element> userIter = users.elementIterator(); while (userIter.hasNext()) { org.dom4j.Element user = userIter.next(); List<org.dom4j.Attribute> userAttr = user.attributes(); for(int i=0;i<userAttr.size();i++){ org.dom4j.Attribute attr = userAttr.get(i); System.out.println(attr.getName()+":"+attr.getValue()); } Iterator<org.dom4j.Element> infoIter = user.elementIterator(); while(infoIter.hasNext()){ org.dom4j.Element info = infoIter.next(); System.out.println(info.getName()+":"+info.getText()); } System.out.println(); } } public void transformXml(String fileName) throws Exception { org.dom4j.Document document = DocumentHelper.createDocument(); org.dom4j.Element users = document.addElement("users"); org.dom4j.Element user = users.addElement("user"); org.dom4j.Element name = user.addElement("name"); org.dom4j.Element age = user.addElement("age"); org.dom4j.Element gender = user.addElement("gender"); user.addAttribute("id", "1"); name.setText("zhangsan"); age.setText("20"); gender.setText("Male"); OutputFormat format = OutputFormat.createPrettyPrint(); format.setEncoding("utf-8"); XMLWriter writer = new XMLWriter(new FileOutputStream(new File(fileName))); writer.write(document); writer.close(); } }