(翻译自http://www.ibm.com/developerworks/opensource/library/x-android/)
写代码迟早都得跟xml打交道,java有两种常见的xml处理方式:SAX和DOM,在android平台上都可以用。另外,StAX是一种新的xml处理方式,android还没有提供原生支持,但是提供了一个功能类似的库 - XML pull parser,三种方式的使用分别如下。
- 首先,我们要处理的xml长得像这样(rss feed):
<?xml version="1.0" encoding="UTF-8"?> <rss version="2.0" > <channel> <title> android_news </title> <description> android_news </description> <link> http://www.androidster.com/android_news.php </link> <item> <title> Samsung S8000 to Run Android, Play DivX, Take Over the World </title> <link> http://www.androidster.com/android_news/</link> <description>A yet-to-be announced phone called the S8000 is being reported ... </description> <pubDate> Thu, 16 Apr 2009 07:18:51 +0100 </pubDate> </item> <item> <title> Android Cupcake Update on the Horizon </title> <link> http://www.androidster.com/android_news/android-cupcake-update- on-the-horizon </link> <description> all been waiting for is about to finally make it out ... </description> <pubDate> Tue, 14 Apr 2009 04:13:21 +0100 </pubDate> </item> </channel> </rss
public class Message { private static SimpleDateFormat FORMATTER = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss Z"); private String title; private URL link; private String description; private Date date; public void setLink(String link) { try { this.link = new URL(link); } catch (MalformedURLException e) { throw new RuntimeException(e); } } public void setDate(String date) { try { this.date = FORMATTER.parse(date.trim()); } catch (ParseException e) { throw new RuntimeException(e); } } // 省略其他的getter, setter }
public interface FeedParser { List<Message> parse(); }
public abstract class BaseFeedParser implements FeedParser { // XML tags static final String PUB_DATE = "pubDate"; static final String DESCRIPTION = "description"; static final String LINK = "link"; static final String TITLE = "title"; static final String ITEM = "item"; final URL feedUrl; protected BaseFeedParser(String feedUrl){ try { this.feedUrl = new URL(feedUrl); } catch (MalformedURLException e) { throw new RuntimeException(e); } } protected InputStream getInputStream() { try { return feedUrl.openConnection().getInputStream(); } catch (IOException e) { throw new RuntimeException(e); } } }
public class SaxFeedParser extends BaseFeedParser { protected SaxFeedParser(String feedUrl){ super(feedUrl); } public List<Message> parse() { SAXParserFactory factory = SAXParserFactory.newInstance(); try { SAXParser parser = factory.newSAXParser(); RssHandler handler = new RssHandler(); parser.parse(this.getInputStream(), handler); return handler.getMessages(); } catch (Exception e) { throw new RuntimeException(e); } } }或者,用android提供的xml工具类更简单:
import android.util.Xml; public class AndroidSaxFeedParser extends BaseFeedParser { public AndroidSaxFeedParser(String feedUrl) { super(feedUrl); } public List<Message> parse() { RssHandler handler = new RssHandler(); try { Xml.parse(this.getInputStream(), Xml.Encoding.UTF_8, handler); } catch (Exception e) { throw new RuntimeException(e); } return handler.getMessages(); } }
org.xml.sax.helpers.DefaultHandler public class RssHandler extends DefaultHandler { private List<Message> messages; private Message currentMessage; private StringBuilder builder; public List<Message> getMessages(){ return this.messages; } @Override public void characters(char[] ch, int start, int length) throws SAXException { super.characters(ch, start, length); builder.append(ch, start, length); } @Override public void endElement(String uri, String localName, String name) throws SAXException { super.endElement(uri, localName, name); if (this.currentMessage != null){ if (localName.equalsIgnoreCase(TITLE)){ currentMessage.setTitle(builder.toString()); } else if (localName.equalsIgnoreCase(LINK)){ currentMessage.setLink(builder.toString()); } else if (localName.equalsIgnoreCase(DESCRIPTION)){ currentMessage.setDescription(builder.toString()); } else if (localName.equalsIgnoreCase(PUB_DATE)){ currentMessage.setDate(builder.toString()); } else if (localName.equalsIgnoreCase(ITEM)){ messages.add(currentMessage); } builder.setLength(0); } } @Override public void startDocument() throws SAXException { super.startDocument(); messages = new ArrayList<Message>(); builder = new StringBuilder(); } @Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { super.startElement(uri, localName, name, attributes); if (localName.equalsIgnoreCase(ITEM)){ this.currentMessage = new Message(); } } }
public class AndroidSaxFeedParser extends BaseFeedParser { public AndroidSaxFeedParser(String feedUrl) { super(feedUrl); } public List<Message> parse() { final Message currentMessage = new Message(); RootElement root = new RootElement("rss"); final List<Message> messages = new ArrayList<Message>(); Element channel = root.getChild("channel"); Element item = channel.getChild(ITEM); item.setEndElementListener(new EndElementListener(){ public void end() { messages.add(currentMessage.copy()); } }); item.getChild(TITLE).setEndTextElementListener(new EndTextElementListener(){ public void end(String body) { currentMessage.setTitle(body); } }); item.getChild(LINK).setEndTextElementListener(new EndTextElementListener(){ public void end(String body) { currentMessage.setLink(body); } }); // 其他字段省略 try { Xml.parse(this.getInputStream(), Xml.Encoding.UTF_8, root.getContentHandler()); } catch (Exception e) { throw new RuntimeException(e); } return messages; } }这种方式的特点是,你先手动建立xml的结构模型,然后在需要处理的节点上注册一个 EndTextElementListener。
public class DomFeedParser extends BaseFeedParser { protected DomFeedParser(String feedUrl) { super(feedUrl); } public List<Message> parse() { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); List<Message> messages = new ArrayList<Message>(); try { DocumentBuilder builder = factory.newDocumentBuilder(); Document dom = builder.parse(this.getInputStream()); Element root = dom.getDocumentElement(); NodeList items = root.getElementsByTagName(ITEM); for (int i=0;i<items.getLength();i++){ Message message = new Message(); Node item = items.item(i); NodeList properties = item.getChildNodes(); for (int j=0;j<properties.getLength();j++){ Node property = properties.item(j); String name = property.getNodeName(); if (name.equalsIgnoreCase(TITLE)){ message.setTitle(property.getFirstChild().getNodeValue()); } else if (name.equalsIgnoreCase(LINK)){ message.setLink(property.getFirstChild().getNodeValue()); } else if (name.equalsIgnoreCase(DESCRIPTION)){ StringBuilder text = new StringBuilder(); NodeList chars = property.getChildNodes(); for (int k=0;k<chars.getLength();k++){ text.append(chars.item(k).getNodeValue()); } message.setDescription(text.toString()); } else if (name.equalsIgnoreCase(PUB_DATE)){ message.setDate(property.getFirstChild().getNodeValue()); } } messages.add(message); } } catch (Exception e) { throw new RuntimeException(e); } return messages; } }
public XmlPullFeedParser(String feedUrl) { super(feedUrl); } public List<Message> parse() { List<Message> messages = null; XmlPullParser parser = Xml.newPullParser(); try { // auto-detect the encoding from the stream parser.setInput(this.getInputStream(), null); int eventType = parser.getEventType(); Message currentMessage = null; boolean done = false; while (eventType != XmlPullParser.END_DOCUMENT && !done){ String name = null; switch (eventType){ case XmlPullParser.START_DOCUMENT: messages = new ArrayList<Message>(); break; case XmlPullParser.START_TAG: name = parser.getName(); if (name.equalsIgnoreCase(ITEM)){ currentMessage = new Message(); } else if (currentMessage != null){ if (name.equalsIgnoreCase(LINK)){ currentMessage.setLink(parser.nextText()); } else if (name.equalsIgnoreCase(DESCRIPTION)){ currentMessage.setDescription(parser.nextText()); } else if (name.equalsIgnoreCase(PUB_DATE)){ currentMessage.setDate(parser.nextText()); } else if (name.equalsIgnoreCase(TITLE)){ currentMessage.setTitle(parser.nextText()); } } break; case XmlPullParser.END_TAG: name = parser.getName(); if (name.equalsIgnoreCase(ITEM) && currentMessage != null){ messages.add(currentMessage); } else if (name.equalsIgnoreCase(CHANNEL)){ done = true; } break; } eventType = parser.next(); } } catch (Exception e) { throw new RuntimeException(e); } return messages; } }
xml pull parser 比SAX多了一个循环,它不断调用parser.next()读取事件。值得注意的一点是,你可以随时终止while循环,比如你只需要读取到某一个节点时。
总结:
- android平台上,大多时候SAX是一个安全的选择
- 如果文档很小,DOM用起来更简单。
- 文档很大,并且你只需要文档的一部分,那么xml pull parser更高效。