处理XML一般有两种方法,DOM和基于流。相关有很多工具,比如SAX、STAX、DOM、JDOM、DOM4J等,SAX和STAX都是基于流,前者属于推模型,后者是拉模型,STAX为oracle公司提出基于流(stream)来处理的方式,在Java中封装成了stax,和sax很像,在webservice中一般使用基于流的工具,基于dom的或多或少的会影响一些效率。WS中还要涉及到Java对象和XML之间的转换,可以直接使用JDK提供的JAXB。类似的还有:XStream、Jackson、json-lib,这些框架提供了xml和json,json和java对象的转换,根据具体的需求可选择不同的框架。
Jaxb的用法很简单,把Java对象转换为xml叫编排,xml转换为Java对象叫反编排,实例如下:
package com.tgb.xml; import java.io.StringReader; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.bind.Marshaller; import javax.xml.bind.Unmarshaller; import org.junit.Test; public class TestJaxb { @Test public void test01(){ try { JAXBContext ctx = JAXBContext.newInstance(Student.class); Marshaller marshaller = ctx.createMarshaller(); Student stu = new Student(1,"这是","32",new Classroom(1,"计算机","2012")); marshaller.marshal(stu, System.out); } catch (JAXBException e) { e.printStackTrace(); } } @Test public void test02(){ try { String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?><student><age>32</age><classroom><grade>2012</grade><id>1</id><name>计算机</name></classroom><id>1</id><name>这是</name></student>"; JAXBContext ctx = JAXBContext.newInstance(Student.class); Unmarshaller um = ctx.createUnmarshaller(); Student stu = (Student)um.unmarshal(new StringReader(xml)); System.out.println(stu.getName() + "," + stu.getClassroom().getName()); } catch (JAXBException e) { e.printStackTrace(); } } }
下面主要讲stax操作xml的实例,首先创建一个xml文档:
<?xml version="1.0" encoding="ISO-8859-1"?> <bookstore> <book category="COOKING"> <title lang="en"> Everyday Italian </title> <author>Giada De Laurentiis</author> <year>2005</year> <price>30.00</price> </book> <book category="CHILDREN"> <title lang="en">Harry Potter</title> <author>J K. Rowling</author> <year>2005</year> <price>29.99</price> </book> <book category="WEB"> <title lang="en">XQuery Kick Start</title> <author>James McGovern</author> <author>Per Bothner</author> <author>Kurt Cagle</author> <author>James Linn</author> <author>Vaidyanathan Nagarajan</author> <year>2003</year> <price>49.99</price> </book> <book category="WEB"> <title lang="en">Learning XML</title> <author>Erik T. Ray</author> <year>2003</year> <price>39.95</price> </book> </bookstore>
基于光标的查找:
@Test public void test01(){ XMLInputFactory factory = XMLInputFactory.newInstance(); InputStream is = null; is = TestStax.class.getClassLoader().getResourceAsStream("books.xml"); XMLStreamReader reader; try { reader = factory.createXMLStreamReader(is); while(reader.hasNext()){ int type = reader.next(); if(type == XMLStreamConstants.START_ELEMENT){ //起始节点 System.out.println(reader.getName()); }else if(type == XMLStreamConstants.CHARACTERS){ //文本节点 System.out.println(reader.getText()); }else if(type == XMLStreamConstants.END_ELEMENT){ //结束节点 System.out.println("/" + reader.getName()); } } } catch (XMLStreamException e) { e.printStackTrace(); }finally{ if(is != null){ try { is.close(); } catch (IOException e) { e.printStackTrace(); } } } } @Test public void test02(){ XMLInputFactory factory = XMLInputFactory.newInstance(); InputStream is = null; is = TestStax.class.getClassLoader().getResourceAsStream("books.xml"); XMLStreamReader reader; try { reader = factory.createXMLStreamReader(is); while(reader.hasNext()){ int type = reader.next(); if(type == XMLStreamConstants.START_ELEMENT){ String name = reader.getName().toString(); if("book".equals(name)){ //读取属性名和值 System.out.println(reader.getAttributeName(0) + ":" + reader.getAttributeValue(0)); } //获取元素内容 if("price".equals(name)){ System.out.println(reader.getElementText() + "\n"); } } } } catch (XMLStreamException e) { e.printStackTrace(); }finally{ if(is != null){ try { is.close(); } catch (IOException e) { e.printStackTrace(); } } } }
基于迭代模型查找:
@Test public void test03(){ XMLInputFactory factory = XMLInputFactory.newInstance(); InputStream is = null; is = TestStax.class.getClassLoader().getResourceAsStream("books.xml"); try { //基于迭代模型的操作方式 XMLEventReader reader = factory.createXMLEventReader(is); int num = 0; while(reader.hasNext()){ //通过XMLEvent来获取是否是某种节点类型 XMLEvent event = reader.nextEvent(); if(event.isStartElement()){ //通过event.asxxx转换节点 String name = event.asStartElement().getName().toString(); if("title".equals(name)){ System.out.println(reader.getElementText() + ":"); } if("price".equals(name)){ System.out.println(reader.getElementText() + "\n"); } } num++; } System.out.println(num); } catch (XMLStreamException e) { e.printStackTrace(); }finally{ if(is != null){ try { is.close(); } catch (IOException e) { e.printStackTrace(); } } } }
过滤器的使用:
@Test public void test04(){ XMLInputFactory factory = XMLInputFactory.newInstance(); InputStream is = null; is = TestStax.class.getClassLoader().getResourceAsStream("books.xml"); try { //基于Filter的过滤方式,可以有效的过滤不用进行操作的节点,效率高 XMLEventReader reader = factory.createFilteredReader(factory.createXMLEventReader(is), new EventFilter() { @Override public boolean accept(XMLEvent event) { if(event.isStartElement()){ String name = event.asStartElement().getName().toString(); if(name.equals("title") || name.equals("price")){ return true; } } return false; } }); int num = 0; while(reader.hasNext()){ XMLEvent event = reader.nextEvent(); if(event.isStartElement()){ String name = event.asStartElement().getName().toString(); if("title".equals(name)){ System.out.println(reader.getElementText() + ":"); } if("price".equals(name)){ System.out.println(reader.getElementText() + "\n"); } } num++; } System.out.println(num); } catch (XMLStreamException e) { e.printStackTrace(); }finally{ if(is != null){ try { is.close(); } catch (IOException e) { e.printStackTrace(); } } } }
XPath的使用:
@Test public void test05(){ XMLInputFactory factory = XMLInputFactory.newInstance(); InputStream is = null; try{ is = TestStax.class.getClassLoader().getResourceAsStream("books.xml"); //创建文档处理对象 DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder(); //通过DocumentBuilder创建doc的文档对象 Document doc = db.parse(is); //创建xpath XPath xpath = XPathFactory.newInstance().newXPath(); //第一个参数就是xpath,第二个参数就是文档 NodeList list = (NodeList)xpath.evaluate("//book[@category='WEB']", doc, XPathConstants.NODESET); for(int i=0;i<list.getLength();i++){ //遍历输出相应的结果 Element e = (Element)list.item(i); System.out.println(e.getElementsByTagName("title").item(0).getTextContent()); } } catch (Exception e) { e.printStackTrace(); }finally{ if(is != null){ try { is.close(); } catch (IOException e) { e.printStackTrace(); } } } }
使用XMLStreamWriter创建xml:
@Test public void test06(){ try { XMLStreamWriter xsw = XMLOutputFactory.newInstance().createXMLStreamWriter(System.out); xsw.writeStartDocument("utf-8", "1.0"); xsw.writeEndDocument(); String ns = "http://www.tgb.com"; xsw.writeStartElement("prefix","person",ns); xsw.writeStartElement(ns,"id"); xsw.writeCharacters("1"); xsw.writeEndElement(); xsw.writeEndElement(); xsw.flush(); xsw.close(); } catch (XMLStreamException e) { e.printStackTrace(); } catch (FactoryConfigurationError e) { e.printStackTrace(); } }
使用Transformer更新节点信息:
@Test public void test07(){ try { InputStream is = TestStax.class.getClassLoader().getResourceAsStream("books.xml"); //创建文档处理对象 DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder(); //通过DocumentBuilder创建doc文档对象 Document doc = db.parse(is); //创建xpath XPath xpath = XPathFactory.newInstance().newXPath(); Transformer tran = TransformerFactory.newInstance().newTransformer(); tran.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); tran.setOutputProperty(OutputKeys.INDENT, "yes"); //第一个参数就是xpath,第二个参数就是文档 NodeList list = (NodeList)xpath.evaluate("//book[title='Learning XML']",doc,XPathConstants.NODESET); //获取price节点 Element be = (Element)list.item(0); Element e = (Element)(be.getElementsByTagName("price").item(0)); e.setTextContent("232323"); Result result = new StreamResult(System.out); //修改源 tran.transform(new DOMSource(doc), result); } catch (Exception e) { e.printStackTrace(); } }
以上都是stax的简单应用,具体可参考JDK,结合本文,可参考《XML解析技术研究(一)》和《XML解析技术研究(二)》、《Java6.0新特性之StAX--全面解析Java XML分析技术》《使用JAXP进行SAX解析(XMLReaderFactory、XMLReader 、SAXParserFactory与SAXParser)》写的很棒!
DOM4j解析xml的一篇文章:《使用Dom4j解析XML》,总体来说还是dom4j好用些。