解析XML之DOM4J

概述

DOM4J是dom4j.org出品的一个开源XML解析包,Dom4j是一个易用的,开源的库,
	用于XML,XPath和XSLT它应用于Java平台,采用了Java集合框架并完全支持DOM,SAX和JAXP
DOM4J架包下载
	http://downloads.sourceforge.net/dom4j/dom4j-1.6.1.jar
DOM4J在线API
	http://www.javasoft.ch/dom4j-2.0.0-beta/apidocs/org/dom4j/package-summary.html
jaxen(对XPath的支持)
	http://dist.codehaus.org/jaxen/distributions/jaxen-1.1.1.zip

Dom4j的主要接口

解析XML之DOM4J_第1张图片
Node
	为所有的dom4j中XML节点定义了多态行为
Branch
	为能够包含子节点的节点,如XML元素(Element)和文档(Docuemnts)定义了一个公共的行为
	Element定义XML 元素
	Document定义了XML文档
DocumentType
	定义XML DOCTYPE声明
Entity
	定义 XML entity
Attribute
	定义了XML的属性
ProcessingInstruction
	定义 XML 处理指令
CharacterData
	是一个标识接口,标识基于字符的节点,如CDATA,Comment,Text
	CDATA定义了XML CDATA区域
	Text定义XML文本节点
	Comment定义了XML注释的行为

创建XML文件的两种方法

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE student SYSTEM "dom4j.dtd">
<students>
	<student>
		<name>lwc</name>
		<sex>boy</sex>
		<age>20</age>
	</student>
	<student>
		<name>nxj</name>
		<sex>girl</sex>
		<age>18</age>
	</student>
</students>

转换字符串创建

package com.itlwc;

import java.io.File;
import java.io.FileWriter;

import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;

public class Dom4jTest {
	public static void main(String[] args) {
		String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"+
					"<!DOCTYPE student SYSTEM \"dom4j.dtd\">"+
					"<students>" +
						"<student>" +
							"<name>lwc</name>" +
							"<sex>boy</sex>" +
							"<age>20</age>" +
						"</student>" +
						"<student>" +
							"<name>nxj</name>" +
							"<sex>girl</sex>" +
							"<age>18</age>" +
						"</student>" +
					"</students>";
		try {
			//解析XML,得到文档
			Document document = DocumentHelper.parseText(xml);
			//创建默认漂亮输出格式
			OutputFormat format = OutputFormat.createPrettyPrint();
			File file = new File("output.xml");
			//将文档输出到文件保存,可指定字符编码,可指定格式化输出
			XMLWriter output = new XMLWriter(new FileWriter(file), format);
			output.write(document);
			output.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

Dom4j的方法创建

package com.itlwc;

import java.io.File;
import java.io.FileWriter;

import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;

public class Dom4jTest {
	public static void main(String[] args) {
		try {
			// 得到文档实例
			Document document = DocumentHelper.createDocument();
			document.addDocType("student", "", "dom4j.dtd");
			// 创建元素
			Element studentsElement = document.addElement("students");
			Element student1Element = studentsElement.addElement("student");
			Element name1Element = student1Element.addElement("name");
			name1Element.setText("lwc");
			Element sex1Element = student1Element.addElement("sex");
			sex1Element.setText("boy");
			Element age1Element = student1Element.addElement("age");
			age1Element.setText("20");

			Element student2Element = studentsElement.addElement("student");
			Element name2Element = student2Element.addElement("name");
			name2Element.setText("nxj");
			Element sex2Element = student2Element.addElement("sex");
			sex2Element.setText("girl");
			Element age2Element = student2Element.addElement("age");
			age2Element.setText("18");

			// 创建默认漂亮输出格式
			OutputFormat format = OutputFormat.createPrettyPrint();
			File file = new File("output.xml");
			// 将文档输出到文件保存,可指定字符编码,可指定格式化输出
			XMLWriter output = new XMLWriter(new FileWriter(file), format);
			output.write(document);
			output.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

修改XML文件

package com.itlwc;

import java.io.File;
import java.io.FileWriter;
import java.util.Iterator;
import java.util.List;

import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;

public class Dom4jTest {
	public static void main(String[] args) {
		try {
			SAXReader saxReader = new SAXReader();
			File file = new File("output.xml");
			// 定位资源文件并获取document实例
			Document document = saxReader.read(file);
			// Document实例的selectNodes方法可以传入xpath,并返回一个List实例
			List list = document.selectNodes("//students/student");
			Iterator iter = list.iterator();
			while (iter.hasNext()) {
				Element element = (Element) iter.next();
				Iterator iterName = element.elementIterator("name");
				while (iterName.hasNext()) {
					Element nameElement = (Element) iterName.next();
					if ("lwc".equals(nameElement.getText())) {
						// 修改文本
						nameElement.setText("itlwc");
					}
				}
			}
			// 从新保存文件
			XMLWriter output = new XMLWriter(new FileWriter(new File(
					"students-modified.xml")));
			output.write(document);
			output.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}
/*
output.xml
	<?xml version="1.0" encoding="UTF-8"?>
	<students>
		<student no="1001">
			<name>lwc</name>
			<sex>boy</sex>
			<age>20</age>
		</student>
		<student no="1002">
			<name>nxj</name>
			<sex>girl</sex>
			<age>18</age>
		</student>
	</students>
students-modified.xml
	<?xml version="1.0" encoding="UTF-8"?>
	<students>
		<student no="1001">
			<name>itlwc</name>
			<sex>boy</sex>
			<age>20</age>
		</student>
		<student no="1002">
			<name>nxj</name>
			<sex>girl</sex>
			<age>18</age>
		</student>
	</students>
*/

遍历XML文件

<?xml version="1.0" encoding="UTF-8"?>
<students>
	<student no="1001">
		<name>lwc</name>
		<sex>boy</sex>
		<age>20</age>
	</student>
	<student no="1002">
		<name>nxj</name>
		<sex>girl</sex>
		<age>18</age>
	</student>
</students> 

使用迭代遍历

package com.itlwc;

import java.io.File;
import java.util.Iterator;

import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

public class Dom4jTest {
	public static void main(String[] args) {
		try {
			SAXReader saxReader = new SAXReader();
			File file = new File("output.xml");
			// 定位资源文件并获取document实例
			Document document = saxReader.read(file);
			// 获取根元素students
			Element root = document.getRootElement();
			System.out.println(root.getName());
			// 枚举根节点下所有子节点student
			Iterator iteStudent = root.elementIterator();
			while (iteStudent.hasNext()) {
				Element elementStudent = (Element) iteStudent.next();
				System.out.println(elementStudent.getName());

				// 枚举根节点下所有子节点student的子节点
				Iterator iteSonNode = elementStudent.elementIterator();
				while (iteSonNode.hasNext()) {
					Element elementSon = (Element) iteSonNode.next();
					System.out.println(elementSon.getName() + ":"
							+ elementSon.getText());
				}

				// 枚举根节点下所有子节点student的属性
				Iterator iteSonAttribute = elementStudent.attributeIterator();
				while (iteSonAttribute.hasNext()) {
					Attribute attribute = (Attribute) iteSonAttribute.next();
					System.out.println(attribute.getName() + ":"
							+ attribute.getData());
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}
/*
打印结果:
	students
	student
	name:lwc
	sex:boy
	age:20
	no:1001
	student
	name:nxj
	sex:girl
	age:18
	no:1002
*/

使用观察者模式遍历

Visitor是GOF设计模式之一,其主要原理就是两种类互相保有对方的引用,
	并且一种作为Visitor去访问许多Visitable
	DOM4J中的Visitor模式只需要自定一个类实现Visitor接口即可
	public class MyVisitor extends VisitorSupport {
	    public void visit(Element element) {
	       System.out.println(element.getName());
	    }
	 
	    public void visit(Attribute attr) {
	       System.out.println(attr.getName());
	    }
	}
调用:root.accept(new MyVisitor())
Visitor接口提供多种Visit()的重载,根据XML不同的对象,将采用不同的方式来访问
	上面是给出的Element和Attribute的简单实现,一般比较常用的就是这两个
	VisitorSupport是DOM4J提供的默认适配器,Visitor接口的Default Adapter模式,
	这个模式给出了各种visit(*)的空实现,以便简化代码
注意,这个Visitor是自动遍历所有子节点的,
	如果是root.accept(MyVisitor),将遍历子节点
案例
package com.itlwc;

import java.io.File;

import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.ProcessingInstruction;
import org.dom4j.VisitorSupport;
import org.dom4j.io.SAXReader;

/**
 * 定义自己的访问者类
 */
class MyVisitor extends VisitorSupport {
	// 对于属性节点,打印属性的名字和值
	public void visit(Attribute node) {
		System.out.println("attribute : " + node.getName() + " = "
				+ node.getValue());
	}

	// 对于处理指令节点,打印处理指令目标和数据
	public void visit(ProcessingInstruction node) {
		System.out.println("PI : " + node.getTarget() + " " + node.getText());
	}

	// 对于元素节点,判断是否只包含文本内容,如是,则打印标记的名字和 元素的内容,如果不是,则只打印标记的名字
	public void visit(Element node) {
		if (node.isTextOnly()) {
			System.out.println("element : " + node.getName() + " = "
					+ node.getText());
		} else {
			System.out.println("-----" + node.getName() + "-----");
		}
	}
}

public class Dom4jTest {
	public static void main(String[] args) {
		try {
			SAXReader saxReader = new SAXReader();
			File file = new File("output.xml");
			// 定位资源文件并获取document实例
			Document document = saxReader.read(file);
			document.accept(new MyVisitor());
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}
/* 
打印结果: 
    -----students-----
	-----student-----
	attribute : no = 1001
	element : name = lwc
	element : sex = boy
	element : age = 20
	-----student-----
	attribute : no = 1002
	element : name = nxj
	element : sex = girl
	element : age = 18
*/

你可能感兴趣的:(解析XML之DOM4J)