android dom4j sax 兼容问题

1.在Android2.1中使用dom4j出现的问题

测试:

package com.example.dom4jlibs;

import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;

import android.os.Bundle;
import android.app.Activity;
import android.util.Log;
import android.view.Menu;

public class MainActivity extends Activity {

	private static String TAG = "MainActivity";
    @Override
    public void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);
        
        testDom4j();
    }

    @Override
    public boolean onCreateOptionsMenu(Menu menu) {
        getMenuInflater().inflate(R.menu.activity_main, menu);
        return true;
    }
    
    public void testDom4j() {
    	//创建xml文档
        Document document = DocumentHelper.createDocument();

        Element root = document.addElement( "root" );
        
        Element author1 = root.addElement( "author" )
            .addAttribute( "name", "James" )
            .addAttribute( "location", "UK" )
            .addText( "James Strachan" );
        
        Element author2 = root.addElement( "author" )
            .addAttribute( "name", "Bob" )
            .addAttribute( "location", "US" )
            .addText( "Bob McWhirter" );

        //将xml文档转换成string
        String xmlString = document.asXML();
        //在LogCat窗口显示
        Log.d(TAG, xmlString);
        //将xmlString解析成xml文档
        try {
			Document xmlDoc = DocumentHelper.parseText(xmlString);
			//显示文档内容
			Log.d(TAG,xmlDoc.asXML());
		} catch (DocumentException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
    }
}

结果:

11-30 13:18:34.944: D/MainActivity(27701): <?xml version="1.0" encoding="UTF-8"?>
11-30 13:18:34.944: D/MainActivity(27701): <root><author name="James" location="UK">James Strachan</author><author name="Bob" location="US">Bob McWhirter</author></root>
11-30 13:18:34.975: D/MainActivity(27701): <?xml version="1.0" encoding="UTF-8"?>
11-30 13:18:34.975: D/MainActivity(27701): <root><author name="James" name="UK">James Strachan</author><author name="Bob" name="US">Bob McWhirter</author></root>

可以看到,所有的属性名称都变成了第一个属性的名称(location变成了name)。


2.分析

出现错误的代码为:

DocumentHelper.parseText(xmlString)
parseText函数在org.dom4j.DocumentHelper.java 的第269行
    /**
     * <p>
     * <code>parseText</code> parses the given text as an XML document and
     * returns the newly created Document.
     * </p>
     * 
     * @param text
     *            the XML text to be parsed
     * 
     * @return a newly parsed Document
     * 
     * @throws DocumentException
     *             if the document could not be parsed
     */
    public static Document parseText(String text) throws DocumentException {
        Document result = null;

        SAXReader reader = new SAXReader();
        String encoding = getEncoding(text);

        InputSource source = new InputSource(new StringReader(text));
        source.setEncoding(encoding);

        result = reader.read(source);

        // if the XML parser doesn't provide a way to retrieve the encoding,
        // specify it manually
        if (result.getXMLEncoding() == null) {
            result.setXMLEncoding(encoding);
        }

        return result;
    }

注意上面红色的内容,dom4j解析字符串的时候是通过创建一个SAXReader来解析的。
reader.read(source)
    /**
     * <p>
     * Reads a Document from the given <code>InputSource</code> using SAX
     * </p>
     * 
     * @param in
     *            <code>InputSource</code> to read from.
     * 
     * @return the newly created Document instance
     * 
     * @throws DocumentException
     *             if an error occurs during parsing.
     */
    public Document read(InputSource in) throws DocumentException {
        try {
            XMLReader reader = getXMLReader();

            reader = installXMLFilter(reader);

            EntityResolver thatEntityResolver = this.entityResolver;

            if (thatEntityResolver == null) {
                thatEntityResolver = createDefaultEntityResolver(in
                        .getSystemId());
                this.entityResolver = thatEntityResolver;
            }

            reader.setEntityResolver(thatEntityResolver);

            SAXContentHandler contentHandler = createContentHandler(reader);
            contentHandler.setEntityResolver(thatEntityResolver);
            contentHandler.setInputSource(in);

            boolean internal = isIncludeInternalDTDDeclarations();
            boolean external = isIncludeExternalDTDDeclarations();

            contentHandler.setIncludeInternalDTDDeclarations(internal);
            contentHandler.setIncludeExternalDTDDeclarations(external);
            contentHandler.setMergeAdjacentText(isMergeAdjacentText());
            contentHandler.setStripWhitespaceText(isStripWhitespaceText());
            contentHandler.setIgnoreComments(isIgnoreComments());
            reader.setContentHandler(contentHandler);

            configureReader(reader, contentHandler);

            reader.parse(in);

            return contentHandler.getDocument();
        } catch (Exception e) {
            if (e instanceof SAXParseException) {
                // e.printStackTrace();
                SAXParseException parseException = (SAXParseException) e;
                String systemId = parseException.getSystemId();

                if (systemId == null) {
                    systemId = "";
                }

                String message = "Error on line "
                        + parseException.getLineNumber() + " of document "
                        + systemId + " : " + parseException.getMessage();

                throw new DocumentException(message, e);
            } else {
                throw new DocumentException(e.getMessage(), e);
            }
        }
    }
注意
XMLReader reader = getXMLReader();
关于XMLReader的注释是:
 org.xml.sax.XMLReader


Interface for reading an XML document using callbacks. 

This module, both source code and documentation, is in the Public Domain, and comes with NO WARRANTY. See http://www.saxproject.org for further information. 
Note: despite its name, this interface does not extend the standard Java Reader interface, because reading XML is a fundamentally different activity than reading character data.

XMLReader is the interface that an XML parser's SAX2 driver must implement. This interface allows an application to set and query features and properties in the parser, to register event handlers for document processing, and to initiate a document parse.

All SAX interfaces are assumed to be synchronous: the parse methods must not return until parsing is complete, and readers must wait for an event-handler callback to return before reporting the next event.

This interface replaces the (now deprecated) SAX 1.0 Parser interface. The XMLReader interface contains two important enhancements over the old Parser interface (as well as some minor ones):

it adds a standard way to query and set features and properties; and 
it adds Namespace support, which is required for many higher-level XML standards. 
There are adapters available to convert a SAX1 Parser to a SAX2 XMLReader and vice-versa.

Since:
SAX 2.0
Version:
2.0.1+ (sax2r3pre1)
Author:
David Megginson
See Also:
org.xml.sax.XMLFilter
org.xml.sax.helpers.ParserAdapter
org.xml.sax.helpers.XMLReaderAdapter
也就是说必须有一个SAX2的Driver(sax parser解析器)来解析XML文档。sax2与sax1的区别主要是sax2支持namespace(更详细的资料参见http://sax.sourceforge.net/sax2-history.html)。
dom4j最终调用了org.dom4j.io.JAXPHelper.java 第46行,这个函数确定了解析xml所用的SAX reader:
    public static XMLReader createXMLReader(boolean validating,
            boolean namespaceAware) throws Exception {
        SAXParserFactory factory = SAXParserFactory.newInstance();
        factory.setValidating(validating);
        factory.setNamespaceAware(namespaceAware);

        SAXParser parser = factory.newSAXParser();

        return parser.getXMLReader();
    }
android上的sax reader(也叫parser?driver?)是org.apache.harmony.xml.ExpatReader,PC上的driver是com.sun.org.apache.xerces.internal.parsers.AbsractSAXParser。
它们对inputsource的解析
reader.parse(in);
Android上的实现:
    public void parse(InputSource input) throws IOException, SAXException {
        if (processNamespacePrefixes && processNamespaces) {
            /*
             * Expat has XML_SetReturnNSTriplet, but that still doesn't
             * include xmlns attributes like this feature requires. We may
             * have to implement namespace processing ourselves if we want
             * this (not too difficult). We obviously "support" namespace
             * prefixes if namespaces are disabled.
             */
            throw new SAXNotSupportedException("The 'namespace-prefix' " +
                    "feature is not supported while the 'namespaces' " +
                    "feature is enabled.");
        }

        // Try the character stream.
        Reader reader = input.getCharacterStream();
        if (reader != null) {
            try {
                parse(reader, input.getPublicId(), input.getSystemId());
            } finally {
                IoUtils.closeQuietly(reader);
            }
            return;
        }

        // Try the byte stream.
        InputStream in = input.getByteStream();
        String encoding = input.getEncoding();
        if (in != null) {
            try {
                parse(in, encoding, input.getPublicId(), input.getSystemId());
            } finally {
                IoUtils.closeQuietly(in);
            }
            return;
        }

        String systemId = input.getSystemId();
        if (systemId == null) {
            throw new SAXException("No input specified.");
        }

        // Try the system id.
        in = ExpatParser.openUrl(systemId);
        try {
            parse(in, encoding, input.getPublicId(), systemId);
        } finally {
            IoUtils.closeQuietly(in);
        }
    }
上面的红色注释,Expat并没有实现xmlns(命名空间属性),如果要用namespace的话就需要自己实现(不难)。也就是说,这个ExpatReader并没有实现(或者完整实现SAX2),所以导致dom4j在解析的时候出现了问题(主要是QName,LocalName,uri,namespace)。

3.解决问题

怎么解决这个问题呢,我使用的方法是用另外的sax driver。在sax官网的quickstart页面(http://sax.sourceforge.net/quickstart.html)上,提供了几个sax driver:

Class Name Notes
gnu.xml.aelfred2.SAXDriver Lightweight non-validating parser; Free Software
gnu.xml.aelfred2.XmlReader Optionally validates; Free Software
oracle.xml.parser.v2.SAXParser Optionally validates; proprietary
org.apache.crimson.parser.XMLReaderImpl Optionally validates; used in JDK 1.4; Open Source
org.apache.xerces.parsers.SAXParser Optionally validates; Open Source

我使用的是org.apache.crimson.parser.XMLReaderImpl,下载:http://xml.apache.org/dist/crimson/crimson-1.1.3-src.zip
将org.apache.crimson和所有的子目录都添加进工程。
修改dom4j的源文件 JAXPHelper.java
-        SAXParserFactory factory = SAXParserFactory.newInstance();
表示这行被删除
+import org.apache.crimson.jaxp.DocumentBuilderFactoryImpl;
表示添加

Index: JAXPHelper.java
===================================================================
--- JAXPHelper.java	(revision 13)
+++ JAXPHelper.java	(revision 14)
@@ -12,6 +12,8 @@
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
 
+import org.apache.crimson.jaxp.DocumentBuilderFactoryImpl;
+import org.apache.crimson.jaxp.SAXParserFactoryImpl;
 import org.xml.sax.XMLReader;
 
 /**
@@ -43,7 +45,8 @@
      */
     public static XMLReader createXMLReader(boolean validating,
             boolean namespaceAware) throws Exception {
-        SAXParserFactory factory = SAXParserFactory.newInstance();
+        SAXParserFactory factory = new SAXParserFactoryImpl();
+        		//SAXParserFactory.newInstance();
         factory.setValidating(validating);
         factory.setNamespaceAware(namespaceAware);
 
@@ -54,7 +57,8 @@
 
     public static org.w3c.dom.Document createDocument(boolean validating,
             boolean namespaceAware) throws Exception {
-        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+        DocumentBuilderFactory factory = new DocumentBuilderFactoryImpl();
+        		//DocumentBuilderFactory.newInstance();
         factory.setValidating(validating);
         factory.setNamespaceAware(namespaceAware);

当然,除了上面的修改,还有其他的修改,主要是在android上没有完全实现jdk,需要添加一些源文件和library。

jar文件可以在 http://code.google.com/p/dom4j-android/downloads/list下载。里面包含了源文件和class文件。

注意,以上修改只经过简单的测试(在android2.1上),如果遇到了问题希望能够发送问题和重现步骤给我,我的邮箱是[email protected]


你可能感兴趣的:(android,android,dom4j,xml解析,xml解析)