通过mycat来学习java了^^。
接前一篇:http://blog.csdn.net/john_chang11/article/details/78715228
前一篇讲到了load方法:
private void load(String dtdFile, String xmlFile) {
InputStream dtd = null;
InputStream xml = null;
try {
dtd = XMLSchemaLoader.class.getResourceAsStream(dtdFile);
xml = XMLSchemaLoader.class.getResourceAsStream(xmlFile);
Element root = ConfigUtil.getDocument(dtd, xml).getDocumentElement();
// 先加载所有的DataHost
loadDataHosts(root);
// 再加载所有的DataNode
loadDataNodes(root);
// 最后加载所有的Schema
loadSchemas(root);
} catch (ConfigException e) {
throw e;
} catch (Exception e) {
throw new ConfigException(e);
} finally {
if (dtd != null) {
try {
dtd.close();
} catch (IOException e) {
}
}
if (xml != null) {
try {
xml.close();
} catch (IOException e) {
}
}
}
}
Element root = ConfigUtil.getDocument(dtd, xml).getDocumentElement();
看看ConfigUtil.getDocument(dtd, xml)方法的内容,此时方法的参数值为/rule.dtd和/rule.xml:
public static Document getDocument(final InputStream dtd, InputStream xml) throws ParserConfigurationException,
SAXException, IOException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setNamespaceAware(false);
DocumentBuilder builder = factory.newDocumentBuilder();
builder.setEntityResolver(new EntityResolver() {
@Override
public InputSource resolveEntity(String publicId, String systemId) {
return new InputSource(dtd);
}
});
builder.setErrorHandler(new ErrorHandler() {
@Override
public void warning(SAXParseException e) {
}
@Override
public void error(SAXParseException e) throws SAXException {
throw e;
}
@Override
public void fatalError(SAXParseException e) throws SAXException {
throw e;
}
});
return builder.parse(xml);
}
该方法就是通过对rule.xml的解析,返回一个w3c的Document类型,用来存放xml文件内容。这又是一个知识点:
XML文档解析:四种方式的简单总结
对XML的解析网上有许多文章,有兴趣可以搜一下,一大把,这里不做过多重复,只对重点列举一下:
以如下XML文档进行展示:
user_id
mod-long
from_user_id
mod-long
to_user_id
mod-long
2
0
1.DOM解析,适合XML文件小的场景
最原始的解析方式,将整个XML文档读入内存,按层次解析成一棵树,容易理解,可以随机访问树的任意部分,但如果XML文档比较大,会占用大量内存。这种解析方式适用于XML文件比较小的场景。
package io.mycat.test;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;
public class DOMrw {
public static void main(String[] args) {
parse();
}
public static void parse() {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder builder = dbf.newDocumentBuilder();
InputStream in = DOMrw.class.getResourceAsStream("/rule.xml");
Document doc = builder.parse(in);
// root
Element root = doc.getDocumentElement();
System.out.print(root.getNodeName());
NamedNodeMap nnm = root.getAttributes();
for (int i = 0; i < nnm.getLength(); i++) {
Node n = nnm.item(i);
System.out.print(" " + n.getNodeName() + "=" + n.getNodeValue() + " ");
}
System.out.println();
// all tableRule, function node
NodeList nodes = root.getChildNodes();
print(nodes, 1);
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private static void print(NodeList nodes, int ind) {
NamedNodeMap nnm;
String txt;
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
if (node.getNodeType() == Node.TEXT_NODE) {
txt = node.getTextContent();
if (txt.replaceAll("\n", "").replaceAll("\t", "").replaceAll(" ", "").length() > 0) {
for (int j = 0; j < ind; j++) {
System.out.print(" ");
}
System.out.print("[" + node.getTextContent() + "]");
System.out.println();
}
}
if (node.getNodeType() == Node.ELEMENT_NODE) {
for (int j = 0; j < ind; j++) {
System.out.print(" ");
}
System.out.print(node.getNodeName());
nnm = node.getAttributes();
for (int j = 0; j < nnm.getLength(); j++) {
Node n = nnm.item(j);
System.out.print(" " + n.getNodeName() + "=" + n.getNodeValue() + " ");
}
System.out.println();
NodeList nds = node.getChildNodes();
if (nds.getLength() == 0) {
System.out.print(node.getTextContent());
} else {
print(nds, ind + 1);
}
}
}
}
}
执行结果:
mycat:rule xmlns:mycat=http://io.mycat/
tableRule name=mod-long
rule
columns
[user_id]
algorithm
[mod-long]
tableRule name=mod-long-from
rule
columns
[from_user_id]
algorithm
[mod-long]
function class=io.mycat.route.function.PartitionByMod name=mod-long
property name=count
[2]
property name=defaultNode
[0]
SAX与DOM正好相反,占用内存少,但只能顺序访问,不能随机访问。SAX在解析XML文档时,按顺序会遇到各种文档元素,遇到时就触发相应的事件,例如startDocument()文档开始事件,endDocument()文档结束事件,startElement()元素开始事件等等,我们重写这些事件方法,就可以完成我们解析的目的,所以SAX也称为事件驱动解析。
SAX不会将整个XML文档都事先读入内存,读过的部分也不保证缓存在内存里,所以适合XML大文档的读取,但只能顺序读取。由于是边读边触发事件边处理,所以SAX的响应速度很快。
XML文档里的元素种类很多,所以对应的事件也很多,但最常用的是startDocument()、endDocument()、startElement()、endElement()、characters()几个。
package io.mycat.test;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class SAXr {
public static void main(String[] args) {
parse();
// write();
}
public static void parse() {
SAXParserFactory saxf = SAXParserFactory.newInstance();
try {
SAXParser saxparser = saxf.newSAXParser();
InputStream in = DOMrw.class.getResourceAsStream("/rule.xml");
saxparser.parse(in, new MySAXHandler());
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
class MySAXHandler extends DefaultHandler {
boolean hasAttribute = false;
Attributes attributes = null;
public void startDocument() throws SAXException {
System.out.println("文档开始");
}
public void endDocument() throws SAXException {
System.out.println("文档结束");
}
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (!uri.isEmpty()) {
System.out.print("uri=" + uri + " ");
}
if (!localName.isEmpty()) {
System.out.print("localName=" + localName + " ");
}
if (!qName.isEmpty()) {
System.out.print("qName=" + qName + " ");
}
for (int i = 0; i < attributes.getLength(); i++) {
System.out.print(attributes.getQName(i) + ":" + attributes.getValue(i) + " ");
}
// System.out.println();
}
public void endElement(String uri, String localName, String qName) throws SAXException {
}
// characters该当就相当于DOM里的TEXT_NODE一样,处理文本信息
public void characters(char[] ch, int start, int length) throws SAXException {
System.out.print(new String(ch, start, length));
}
}
执行结果:
文档开始
qName=mycat:rule xmlns:mycat:http://io.mycat/
qName=tableRule name:mod-long
qName=rule
qName=columns user_id
qName=algorithm mod-long
qName=tableRule name:mod-long-from
qName=rule
qName=columns from_user_id
qName=algorithm mod-long
qName=tableRule name:mod-long-to
qName=rule
qName=columns to_user_id
qName=algorithm mod-long
qName=function name:mod-long class:io.mycat.route.function.PartitionByMod
qName=property name:count 2
qName=property name:defaultNode 0
文档结束
3.JDOM解析,使用简单
前面的DOM方式和SAX方式解析XML代码编写比较复杂,为了简化代码可以使用JDOM,而且完成相同的功能JDOM的代码更高效,但是JDOM不能解决所有的问题。JDOM的目的就是为了解决80%最常见的问题,使这些问题的解决得到极大简化,而另外20%不常见问题,则需要自己编码解决,这些问题很少遇到,可以忽略。所以JDOM还是很值得考虑使用的。
JDOM没有自己的解析器,所以使用的是SAX解析器,但在使用上更类似于DOM,更容易理解。
package io.mycat.test;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;
public class JDOMr {
public static void main(String[] args) {
parse();
// write();
}
public static void parse() {
SAXBuilder saxBuilder = new SAXBuilder();
try {
InputStream in = JDOMr.class.getResourceAsStream("/rule.xml");
Document document = saxBuilder.build(in); // 这里的Document是jdom2的Document
Element root = document.getRootElement(); // 这里的Element是jdom2的Element
System.out.print("name=" + root.getName() + " ");
System.out.print("NamespacePrefix=" + root.getNamespacePrefix() + " ");
System.out.print("NamespaceURI=" + root.getNamespaceURI() + " ");
List lattr = root.getAttributes();
for (Attribute attr : lattr) {
System.out.print(" " + attr.getName() + "=" + attr.getValue() + " ");
}
System.out.println();
// all tableRule, function node
List children = root.getChildren();
print(children, 1);
} catch (JDOMException | IOException e) {
e.printStackTrace();
}
}
private static void print(List children, int ind) {
for (Element elt : children) {
for (int j = 0; j < ind; j++) {
System.out.print(" ");
}
System.out.print(elt.getName());
List lattr = elt.getAttributes();
for (Attribute attr : lattr) {
System.out.print(" " + attr.getName() + "=" + attr.getValue() + " ");
}
String txt = elt.getTextTrim();
if (txt.length() > 0) {
System.out.print("[" + txt + "]");
}
System.out.println();
// all tableRule, function node
List ch = elt.getChildren();
print(ch, ind + 1);
}
}
}
执行结果:
name=rule NamespacePrefix=mycat NamespaceURI=http://io.mycat/
tableRule name=mod-long
rule
columns[user_id]
algorithm[mod-long]
tableRule name=mod-long-from
rule
columns[from_user_id]
algorithm[mod-long]
tableRule name=mod-long-to
rule
columns[to_user_id]
algorithm[mod-long]
function name=mod-long class=io.mycat.route.function.PartitionByMod
property name=count [2]
property name=defaultNode [0]
4.DOM4J解析、最好的解析方式
DOM4J是JDOM的改进,一开始DOM4J只是JDOM的一个智能分支,后来发展成一款独立的开源软件,DOM4J不但可以解决所有的XML解析问题,还包括集成的 XPath 支持、XML Schema 支持以及用于大文档或流化文档的基于事件的处理。总之,DOM4J性能优异、功能强大、极端易用使用。
越来越多的软件在使用DOM4J,连 Sun 的 JAXM 也在用 DOM4J。所以使用DOM4J处理XML是不错的选择。
package io.mycat.test.xml;
import java.io.InputStream;
import java.util.List;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
public class DOM4Jr {
public static void main(String[] args) {
parse();
// write();
}
public static void parse() {
SAXReader reader = new SAXReader();
try {
InputStream in = JDOMr.class.getResourceAsStream("/rule.xml");
Document document = reader.read(in); // 这里的Document是dom4j的Document
Element root = document.getRootElement(); // 这里的Element是dom4j的Element
System.out.print("name=" + root.getName() + " ");
System.out.print("NamespacePrefix=" + root.getNamespacePrefix() + " ");
System.out.print("NamespaceURI=" + root.getNamespaceURI() + " ");
List lattr = root.attributes();
for (Attribute attr : lattr) {
System.out.print(" " + attr.getName() + "=" + attr.getValue() + " ");
}
System.out.println();
// all tableRule, function node
List children = root.elements();
print(children, 1);
} catch (DocumentException e) {
e.printStackTrace();
}
}
private static void print(List children, int ind) {
for (Element elt : children) {
for (int j = 0; j < ind; j++) {
System.out.print(" ");
}
System.out.print(elt.getName());
List lattr = elt.attributes();
for (Attribute attr : lattr) {
System.out.print(" " + attr.getName() + "=" + attr.getValue() + " ");
}
String txt = elt.getTextTrim();
if (txt.length() > 0) {
System.out.print("[" + txt + "]");
}
System.out.println();
// all tableRule, function node
List ch = elt.elements();
print(ch, ind + 1);
}
}
}
执行结果:
name=rule NamespacePrefix=mycat NamespaceURI=http://io.mycat/
tableRule name=mod-long
rule
columns[user_id]
algorithm[mod-long]
tableRule name=mod-long-from
rule
columns[from_user_id]
algorithm[mod-long]
tableRule name=mod-long-to
rule
columns[to_user_id]
algorithm[mod-long]
function name=mod-long class=io.mycat.route.function.PartitionByMod
property name=count [2]
property name=defaultNode [0]
上面就是JAVA的XML的四种解析方式,总的来说,优先使用DOM4J的方式。
现在回过头来再看一下MyCat的XML解析:
public static Document getDocument(final InputStream dtd, InputStream xml) throws ParserConfigurationException,
SAXException, IOException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setNamespaceAware(false);
DocumentBuilder builder = factory.newDocumentBuilder();
builder.setEntityResolver(new EntityResolver() {
@Override
public InputSource resolveEntity(String publicId, String systemId) {
return new InputSource(dtd);
}
});
builder.setErrorHandler(new ErrorHandler() {
@Override
public void warning(SAXParseException e) {
}
@Override
public void error(SAXParseException e) throws SAXException {
throw e;
}
@Override
public void fatalError(SAXParseException e) throws SAXException {
throw e;
}
});
return builder.parse(xml);
}
首先:
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setNamespaceAware(false);
DocumentBuilder builder = factory.newDocumentBuilder();
这一段用的是javax.xml.parsers包里的JAVA自带的基本类。接下来:
builder.setEntityResolver(new EntityResolver() {
@Override
public InputSource resolveEntity(String publicId, String systemId) {
return new InputSource(dtd);
}
});
builder.setErrorHandler(new ErrorHandler() {
@Override
public void warning(SAXParseException e) {
}
@Override
public void error(SAXParseException e) throws SAXException {
throw e;
}
@Override
public void fatalError(SAXParseException e) throws SAXException {
throw e;
}
});
这一段用的又是SAX的XML解析,最后:
return builder.parse(xml);
返回的又是org.w3c.dom包的Document类。
public static Document getDocument(final InputStream dtd, InputStream xml) throws DocumentException {
SAXReader reader = new SAXReader();
reader.setValidation(true);
reader.setEntityResolver(new EntityResolver() {
public InputSource resolveEntity(String publicId, String systemId) {
return new InputSource(dtd);
}
});
return reader.read(xml);
}
这以看到,代码相当简洁,而完成的功能又完全一样。这里的Document是DOM4J里的Document类,不再引用W3C的Document类,风格相当统一。本人已将MyCat里XML解析全部修改成DOM4J的方式,代码简洁不少。