Dom4j 操作XML

Dom4j也可以很方便完成XML文档的创建、元素的修改、文档的查询遍历等,但dom4j稍比jdom复杂一点,不过在大片文档的情况下dom4j的性能要不jdom好。

 

准备

首先,提供相关的jar包

Dom4jjar包下载:

http://sourceforge.net/projects/dom4j/files/dom4j-2.0.0-ALPHA-2/

jaxenjar下载:

http://repo1.maven.org/maven2/jaxen/jaxen/1.1.1/jaxen-1.1.1.jar

和dom4j依赖或相关的jar:

http://dom4j.sourceforge.net/dependencies.html

Junit-jar下载:

http://ebr.springsource.com/repository/app/bundle/version/download?name=com.springsource.org.junit&version=4.8.1&type=binary

 

其次,准备测试案例的部分代码:

package com.hoo.test;

import java.io.File;
import java.util.Iterator;
import java.util.List;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.QName;
import org.dom4j.dom.DOMAttribute;
import org.dom4j.io.SAXReader;
import org.dom4j.tree.BaseElement;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

/**
 * <b>function:</b> 使用Dom4j操作XML
 * @author hoojo
 * @createDate 2011-8-5 下午06:15:40
 * @file DocumentTest.java
 * @package com.hoo.test
 * @project Dom4jTest
 * @blog http://blog.csdn.net/IBM_hoojo
 * @email [email protected]
 * @version 1.0
 */
public class DocumentTest {
	
	private SAXReader reader = null;
	
	@Before
	public void init() {
		reader = new SAXReader();
	}
	
	@After
	public void destory() {
		reader = null;
		System.gc();
	}
	
	public void fail(Object o) {
		if (o != null)
			System.out.println(o);
	}
}

 

创建一篇XML文档

文档格式如下:

<?xml version="1.0" encoding="UTF-8"?> 
<catalog> 
	<!--An XML Catalog--> 
	<?target instruction?>
	<journal title="XML Zone" publisher="IBM developerWorks"> 
		 <article level="Intermediate" date="December-2001">
			 <title>Java configuration with XML Schema</title> 
			 <author> 
			     <firstname>Marcello</firstname> 
			     <lastname>Vitaletti</lastname> 
			 </author>
	  	 </article>
	</journal> 
</catalog>

 

创建文档代码如下:

/**
 * <b>function:</b>创建文档
 * @author hoojo
 * @createDate 2011-8-5 下午06:18:18
 */
@Test
public void createDocument() {
	//创建一篇文档
	Document doc = DocumentHelper.createDocument();
	
	//添加一个元素
	Element root = doc.addElement("catalog");
	//为root元素添加注释
	root.addComment("An XML Catalog");
	//添加标记
	root.addProcessingInstruction("target", "instruction");
	
	//创建元素
	Element journalEl = new BaseElement("journal");
	//添加属性
	journalEl.addAttribute("title", "XML Zone");
	journalEl.addAttribute("publisher", "IBM developerWorks");
	root.add(journalEl);
	
	//添加元素
	Element articleEl = journalEl.addElement("article");
	articleEl.addAttribute("level", "Intermediate");
	articleEl.addAttribute("date", "December-2001");
	
	Element titleEl = articleEl.addElement("title");
	//设置文本内容
	titleEl.setText("Java configuration with XML Schema");
	//titleEl.addText("Java configuration with XML Schema");
	
	Element authorEl = articleEl.addElement("author");
	authorEl.addElement("firstname").setText("Marcello");
	authorEl.addElement("lastname").addText("Vitaletti");
	
	//可以使用 addDocType() 方法添加文档类型说明。 
	doc.addDocType("catalog", null,"file://c:/Dtds/catalog.dtd"); 

	fail(doc.getRootElement().getName());
	
	//将xml转换成文本
	fail(doc.asXML());
	
	//写入到文件
	/*XMLWriter output;
	try {
		output = new XMLWriter(new FileWriter(new File("file/catalog.xml")));
		output.write(doc);
		output.close();
	} catch (IOException e) {
		e.printStackTrace();
	}*/
}
*  DocumentHelper是一个文档助手类(工具类),它可以完成文档、元素、文本、属性、注释、CDATA、Namespace、XPath的创建,以及利用XPath完成文档的遍历和将文本转换成Document;

parseText完成将xml字符串转换成Doc的功能

Document doc = DocumentHelper.parseText("<root></root>");

 

createDocument创建一个文档

Document doc = DocumentHelper.createDocument();

如果带参数就会创建一个带有根元素的文档

createElement创建一个元素

Element el = DocumentHelper.createElement("el");

 

*  Document的addElement方法可以给当前文档添加一个子元素

Element root = doc.addElement("catalog");

 

*  addComment方法可以添加一段注释

root.addComment("An XML Catalog");

为root元素添加一段注释

 

*  addProcessingInstruction添加一个标记

root.addProcessingInstruction("target", "instruction");

为root元素添加一个标记

 

*  newBaseElement可以创建一个元素

Element journalEl = new BaseElement("journal");

 

*  addAttribute添加属性

journalEl.addAttribute("title", "XML Zone");

 

*  add添加一个元素

root.add(journalEl);

将journalEl元素添加到root元素中

 

*  addElement添加一个元素,并返回当前元素

Element articleEl = journalEl.addElement("article");

给journalEl元素添加一个子元素article

 

*  setText、addText可以设置元素的文本

authorEl.addElement("firstname").setText("Marcello");

authorEl.addElement("lastname").addText("Vitaletti");

 

*  addDocType可以设置文档的DOCTYPE

doc.addDocType("catalog", null,"file://c:/Dtds/catalog.dtd");

 

*  asXML可以将文档或元素转换成一段xml字符串

doc.asXML();

root.asXML();

 

*  XMLWriter类可以把文档写入到文件中

output = new XMLWriter(new FileWriter(new File("file/catalog.xml")));

output.write(doc);

output.close();

 

修改XML文档内容

/**
 * <b>function:</b> 修改XML内容
 * @author hoojo
 * @createDate 2011-8-9 下午03:37:04
 */
@SuppressWarnings("unchecked")
@Test
public void modifyDoc() {
	try {
		Document doc = reader.read(new File("file/catalog.xml"));
		
		//修改属性内容
		List list = doc.selectNodes("//article/@level");
		Iterator<Attribute> iter = list.iterator();
		while (iter.hasNext()) {
			Attribute attr = iter.next();
			fail(attr.getName() + "#" + attr.getValue() + "#" + attr.getText());
			if ("Intermediate".equals(attr.getValue())) {
				//修改属性值
				attr.setValue("Introductory");
				fail(attr.getName() + "#" + attr.getValue() + "#" + attr.getText());
			}
		}
		
		list = doc.selectNodes("//article/@date");
		iter = list.iterator();
		while (iter.hasNext()) {
			Attribute attr = iter.next();
			fail(attr.getName() + "#" + attr.getValue() + "#" + attr.getText());
			if ("December-2001".equals(attr.getValue())) {
				//修改属性值
				attr.setValue("December-2011");
				fail(attr.getName() + "#" + attr.getValue() + "#" + attr.getText());
			}
		}
		
		//修改节点内容
		list = doc.selectNodes("//article");
		Iterator<Element> it = list.iterator();
		while (it.hasNext()) {
			Element el = it.next();
			fail(el.getName() + "#" + el.getText() + "#" + el.getStringValue());
			//修改title元素
			Iterator<Element> elIter = el.elementIterator("title");
			while(elIter.hasNext()) {
				Element titleEl = elIter.next();
				fail(titleEl.getName() + "#" + titleEl.getText() + "#" + titleEl.getStringValue());
				if ("Java configuration with XML Schema".equals(titleEl.getTextTrim())) {
					//修改元素文本值
					titleEl.setText("Modify the Java configuration with XML Schema");
					fail(titleEl.getName() + "#" + titleEl.getText() + "#" + titleEl.getStringValue());
				}
			}
		}
		
		//修改节点子元素内容
		list = doc.selectNodes("//article/author");
		it = list.iterator();
		while (it.hasNext()) {
			Element el = it.next();
			fail(el.getName() + "#" + el.getText() + "#" + el.getStringValue());
			List<Element> childs = el.elements();
			for (Element e : childs) {
				fail(e.getName() + "#" + e.getText() + "#" + e.getStringValue());
				if ("Marcello".equals(e.getTextTrim())) {
					e.setText("Ayesha");
				} else if ("Vitaletti".equals(e.getTextTrim())) {
					e.setText("Malik");
				} 
				fail(e.getName() + "#" + e.getText() + "#" + e.getStringValue());
			}
		}
		
		//写入到文件
		/*XMLWriter output = new XMLWriter(new FileWriter(new File("file/catalog-modified.xml")));
		output.write(doc);
		output.close();*/
	} catch (DocumentException e) {
		e.printStackTrace();
	} catch (Exception e) {
		e.printStackTrace();
	}
}
*  reader.read(new File("file/catalog.xml"));读取指定xml文件内容到文档中;

*  selectNodes是XPath的查询方法,完成xml文档的查询,传递xpath路径。其使用方法可以参考jdom的xpath的使用方法:http://www.cnblogs.com/hoojo/archive/2011/08/11/2134638.html

*  getName获取元素标签名称、getValue、getText获取值、文本内容;

*  elementIterator("title");获取当前节点下所有的title元素,返回Iterator;

*  elements获取下面所有的子元素,返回的是一个集合List;

 

显示文档相关信息

private String format(int i) {
	String temp = "";
	while (i > 0) {
		temp += "--";
		i--;
	}
	return temp;
}

/**
 * <b>function:</b>递归显示文档内容
 * @author hoojo
 * @createDate 2011-8-9 下午03:43:45
 * @param i
 * @param els
 */
private void print(int i, List<Element> els) {
	i++;
	for (Element el : els) {
		fail(format(i) + "##" + el.getName() + "#" + el.getTextTrim());
		if (el.hasContent()) {
			print(i, el.elements());
		} 
	}
}

/**
 * <b>function:</b>显示文档相关信息
 * @author hoojo
 * @createDate 2011-8-9 下午03:44:10
 */
@Test
public void printInfo() {
	try {
		Document doc = reader.read(new File("file/catalog.xml"));
		fail("asXML: " + doc.asXML());
		
		fail(doc.asXPathResult(new BaseElement("article")));
		List<Node> list = doc.content();
		for (Node node : list) {
			fail("Node: " + node.getName() + "#" + node.getText() + "#" + node.getStringValue());
		}
		
		fail("-----------------------------");
		print(0, doc.getRootElement().elements());
		
		fail("getDocType: " + doc.getDocType());
		fail("getNodeTypeName: " + doc.getNodeTypeName());
		fail("getPath: " + doc.getRootElement().getPath());
		fail("getPath: " + doc.getRootElement().getPath(new BaseElement("journal")));
		fail("getUniquePath: " + doc.getRootElement().getUniquePath());
		fail("getXMLEncoding: " + doc.getXMLEncoding());
		fail("hasContent: " + doc.hasContent());
		fail("isReadOnly: " + doc.isReadOnly());
		fail("nodeCount: " + doc.nodeCount());
		fail("supportsParent: " + doc.supportsParent());
	} catch (DocumentException e) {
		e.printStackTrace();
	}
	fail("getEncoding: " + reader.getEncoding());
	fail("isIgnoreComments: " + reader.isIgnoreComments());
	fail("isMergeAdjacentText: " + reader.isMergeAdjacentText());
	fail("isStringInternEnabled: " + reader.isStringInternEnabled());
	fail("isStripWhitespaceText: " + reader.isStripWhitespaceText());
	fail("isValidating: " + reader.isValidating());
}

 

删除文档内容

/**
 * <b>function:</b> 删除节点内容
 * @author hoojo
 * @createDate 2011-8-9 下午03:47:44
 */
@Test
public void removeNode() {
	try {
		Document doc = reader.read(new File("file/catalog-modified.xml"));
		fail("comment: " + doc.selectSingleNode("//comment()"));
		//删除注释
		doc.getRootElement().remove(doc.selectSingleNode("//comment()"));
		
		Element node = (Element) doc.selectSingleNode("//article");
		//删除属性
		node.remove(new DOMAttribute(QName.get("level"), "Introductory"));
		//删除元素 节点
		node.remove(doc.selectSingleNode("//title"));
		
		//只能删除下一级节点,不能超过一级;(需要在父元素的节点上删除子元素)
		Node lastNameNode = node.selectSingleNode("//lastname");
		lastNameNode.getParent().remove(lastNameNode);
		
		fail("Text: " + doc.selectObject("//*[text()='Ayesha']"));
		Element firstNameEl = (Element)doc.selectObject("//firstname");
		fail("Text: " + firstNameEl.selectSingleNode("text()"));
		
		//删除text文本
		//firstNameEl.remove(firstNameEl.selectSingleNode("text()"));
		//firstNameEl.remove(doc.selectSingleNode("//firstname/text()"));
		firstNameEl.remove(doc.selectSingleNode("//*[text()='Ayesha']/text()"));
		
		//删除子元素author
		//node.remove(node.selectSingleNode("//author"));
		
		fail(doc.asXML());
	} catch (Exception e) {
		e.printStackTrace();
	}
}

*  删除注释

doc.getRootElement().remove(doc.selectSingleNode("//comment()"));

删除root元素下面的注释

 

*  删除属性

node.remove(new DOMAttribute(QName.get("level"), "Introductory"));

删除node节点中的名称为level,其值为Introductory的属性

 

*  删除元素

node.remove(doc.selectSingleNode("//title"));

删除node节点下的title元素

 

*  删除文本

firstNameEl.remove(firstNameEl.selectSingleNode("text()"));

firstNameEl.remove(doc.selectSingleNode("//firstname/text()"));

firstNameEl.remove(doc.selectSingleNode("//*[text()='Ayesha']/text()"));

删除firstNameEl的文本内容


你可能感兴趣的:(xml,exception,schema,list,File,文档)