在讲这次错误之前,先看一下下面这段代码。 【◆以下解析方法是错误的×】
import java.util.ArrayList;import java.util.List;import org.xml.sax.Attributes;import org.xml.sax.SAXException;import org.xml.sax.helpers.DefaultHandler;import android.util.Log;public class XmlHandler extends DefaultHandler{ private final String TAG = this.getClass().getSimpleName(); /**XML文件中标签定义*/ private final String TAG_Article = "Article"; private final String TAG_ArticleID = "ArticleID"; private final String TAG_Title = "Title"; private final String TAG_Date = "Date"; private final String TAG_SmallPictures = "SmallPictures"; private final String TAG_LargePictures = "LargePictures"; private final String TAG_Category = "Category"; private static final String TAG_HeadNote = "HeadNote"; private static final String TAG_SubTitle = "SubTitle"; private static final String TAG_Source = "Source"; //当前正在解析的TAG private String currentName; //单个文章 private News news = null; //文章列表 private List newsList = null; //解析开始时间 private long start_time; private boolean flag = false; @Override public void characters(char[] ch, int start, int length) throws SAXException { super.characters(ch, start, length); if(!flag) { return; } // 取值 String value = new String(ch, start, length); Log.d(TAG, "Element: " + currentName + " Element Value: " + value); if(value != null) { if(TAG_ArticleID.equals(currentName)) { news.setArticleId(value); } else if(TAG_Title.equals(currentName)) { news.setTitle(value); } else if(TAG_Date.equals(currentName)) { news.setDate(value); } else if(TAG_Category.equals(currentName)) { news.setCategory(value); } else if(TAG_SmallPictures.equals(currentName)) { news.setSmallPicture(value); } else if(TAG_LargePictures.equals(currentName)) { news.setLargePicture(value); } else if(TAG_HeadNote.equals(currentName)) { news.setHeadNote(value); } else if(TAG_SubTitle.equals(currentName)) { news.setSubTitle(value); } else if(TAG_Source.equals(currentName)) { news.setSource(value); } } } @Override public void startDocument() throws SAXException { super.startDocument(); start_time = System.currentTimeMillis(); newsList = new ArrayList(); } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { super.startElement(uri, localName, qName, attributes); this.currentName = localName; flag = true; if(TAG_Article.equals(localName)) { news = new News(); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { super.endElement(uri, localName, qName); flag = false; if(TAG_Article.equals(localName)) { newsList.add(news); } } @Override public void endDocument() throws SAXException { super.endDocument(); long end = System.currentTimeMillis(); Log.d(TAG, "Parse List's Xml Cost: " + (end - start_time) + " !!"); }}
没错, 一般情况下,这么写是可以的, 而且在大多数情况下解析出来也是正确的。 但是就是偶尔会出错, 这个时候通常你都莫不着头脑, 怎么回事? 数据没错啊,解析部分代码貌似也没问题.. 真是奇了怪了。 其实问题都出在上面那段代码上!!
大家都认为 SAX 解析过程大致如下:
startDocument -> startElement -> characters -> endElement -> endDocument
没错,就是这样, startElement 读取起始标签, endElement 读取结束标签,characters 呢?当然是读取其值, 这没错,但是大家都天真的以为 characters 只执行一次,并且一次就读取了全部内容。错就错在这!
其实characters 是很有可能会执行多次的,当遇到内容中有回车,\t等等内容时,它很有可能就执行多次。 有的人可能会说,那我没有这些是不是就只执行一次了? 看下我实测结果:
测试用XML如下:
<News> <Article> <ArticleID>1000555ArticleID> <Title>Title> <Date>2011-11-25 14:23:52Date> <SmallPictures>livenews/images/s20.pngSmallPictures> <LargePictures>livenews/images/l20.pngLargePictures> <Category>闻天下Category> <HeadNote>HeadNote> <SubTitle>SubTitle> <Author>Author> <Source>人民日报Source> <Abstract>Abstract> Article> <Article> <ArticleID>1000554ArticleID> <Title>Title> <Date>2011-11-25 14:22:33Date> <Category>Category> <HeadNote>HeadNote> <SubTitle>SubTitle> <Author>Author> <Source>Source> <Abstract>Abstract> Article> <Article> <ArticleID>1000553ArticleID> <Title>Title> <Date>2011-11-25 14:21:23Date> <SmallPictures>livenews/images/s21.pngSmallPictures> <LargePictures>livenews/images/l21.pngLargePictures> <Category>Category> <HeadNote>HeadNote> <SubTitle>SubTitle> <Author>Author> <Source>Source> <Abstract>Abstract> Article><News>
可以很明显的看到,在解析
好了,废话不多说了,看下正确的写法! 【★以下解析方法才是正确的 √ 】
import java.util.ArrayList;import java.util.List;import org.xml.sax.Attributes;import org.xml.sax.SAXException;import org.xml.sax.helpers.DefaultHandler;import android.util.Log;public class XmlHandler extends DefaultHandler{ private final String TAG = this.getClass().getSimpleName(); /**XML文件中标签定义*/ private final String TAG_Article = "Article"; private final String TAG_ArticleID = "ArticleID"; private final String TAG_Title = "Title"; private final String TAG_Date = "Date"; private final String TAG_SmallPictures = "SmallPictures"; private final String TAG_LargePictures = "LargePictures"; private final String TAG_Category = "Category"; private static final String TAG_HeadNote = "HeadNote"; private static final String TAG_SubTitle = "SubTitle"; private static final String TAG_Source = "Source"; //单个文章 private News news = null; //文章列表 private List newsList = null; //解析开始时间 private long start_time; //(1) private StringBuilder sb = new StringBuilder(); @Override public void characters(char[] ch, int start, int length) throws SAXException { super.characters(ch, start, length); //(2)不管在startElement到endElement的过程中,执行了多少次characters, 都会将内容添加到StringBuilder中,不会丢失内容 sb.append(ch, start, length); } @Override public void startDocument() throws SAXException { super.startDocument(); start_time = System.currentTimeMillis(); newsList = new ArrayList(); } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { super.startElement(uri, localName, qName, attributes); //(3) 开始收集新的标签的数据时,先清空历史数据 sb.setLength(0); if(TAG_Article.equals(localName)) { news = new News(); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { super.endElement(uri, localName, qName); //(4)原来在characters中取值,现改在此取值 String value = sb.toString(); if(TAG_ArticleID.equals(localName)) { news.setArticleId(value); } else if(TAG_Title.equals(localName)) { news.setTitle(value); } else if(TAG_Date.equals(localName)) { news.setDate(value); } else if(TAG_Category.equals(localName)) { news.setCategory(value); } else if(TAG_SmallPictures.equals(localName)) { news.setSmallPicture(value); } else if(TAG_LargePictures.equals(localName)) { news.setLargePicture(value); } else if(TAG_HeadNote.equals(localName)) { news.setHeadNote(value); } else if(TAG_SubTitle.equals(localName)) { news.setSubTitle(value); } else if(TAG_Source.equals(localName)) { news.setSource(value); } if(TAG_Article.equals(localName)) { newsList.add(news); } } @Override public void endDocument() throws SAXException { super.endDocument(); long end = System.currentTimeMillis(); Log.d(TAG, "Parse List's Xml Cost: " + (end - start_time) + " !!"); }}
1.startElement的时候, new StringBuilder(); 或者 sb.setLength(0); (我建议后者)
2.characters的时候,sb.append(ch, start, length);
3.endElement的时候,sb.toString(); 此时StringBuilder中的内容才是解析的结果
通过这种方法就不会再有数据离奇丢失的情况了(同时也不需要像错误方法那样再设个currentTag之类的了,逻辑繁杂了,还出错)!
希望大家可以尽早看到这篇文章,不要继续被吭了!!!