1. XML解析方式分为两种:DOM方式和SAX方式
2. XML解析开发包
1. JAXP:(Java API for XML Processing)开发包是JavaSE的一部分,它由一下几个包及其子包组成:
2. javax.xml.parsers包中,定义了几个工厂类。我们可以通过调用这些工厂类,得到对XML文档进行解析的DOM和SAX解析器对象。
javax.xml.parsers 包中的DocumentBuilderFactory用于创建DOM模式的解析器对象 , DocumentBuilderFactory是一个抽象工厂类,它不能直接实例化,但该类提供了一个newInstance方法 ,这个方法会根据本地平台默认安装的解析器,自动创建一个工厂的对象并返回。
src/book.xml
<书架>
<书>
<书名>你所谓的稳定,不过是在浪费生命书名>
<作者>李尚龙作者>
<售价>32售价>
书>
<书>
<书名>你要么出众,要么出局书名>
<作者>李尚龙2作者>
<售价>27售价>
<批发价>12批发价>书>
书架>
com.it.utils.DemoUtil.java
package com.it.utils;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
public class DemoUtil {
/**
* 读取XML,返回一个Document对象
* @param path
* @return
* @throws Exception
*/
public static Document getDocument(String path) throws Exception{
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = dbFactory.newDocumentBuilder();
Document document = builder.parse("src/books.xml");
return document;
}
/**
* 将一个Document文档对象从内存中写入到指定的文件中
* @param document
* @param path
* @throws Exception
*/
public static void writeXmlToFile(Document document, String path) throws Exception{
Transformer tf = TransformerFactory.newInstance().newTransformer();
tf.transform(new DOMSource(document), new StreamResult(path));
}
}
com.it.test.DemoTest.java
package com.it.test;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.it.utils.DemoUtil;
public class DomTest {
public static void main(String[] args) throws Exception {
test2();
}
public static void test1() throws Exception {
Document document = DemoUtil.getDocument("src/books.xml");
// 2.找节点
NodeList nodelist = document.getElementsByTagName("作者");
Node author = nodelist.item(0);
// 3.找文本输出
System.out.println(author.getTextContent());
}
public static void test2() throws Exception{
//1.得到document
Document document = DemoUtil.getDocument("src/books.xml");
//2.创建新的批发价节点并赋内部文本值
Element bookElement = document.createElement("批发价");
bookElement.setTextContent("12");
//3.找到第二本书,并追加子节点
Node secondBook = document.getElementsByTagName("书").item(1);
secondBook.appendChild(bookElement);
//3.写回XML
DemoUtil.writeXmlToFile(document, "src/books.xml");
}
}
DOM模型(Document Object Model)
1. DOM解析器在解析XML文档时,会把文档中的所有元素,按照其出现的层次关系,解析成一个个Node对象(节点)。
2. 在DOM中,节点之间关系如下:
3. 节点类型
DOM解析编程
方式一:
package www.it.sax;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class SaxTest {
public static void main(String[] args) throws Exception {
//得到SAXParserFactory实例
SAXParserFactory factory = SAXParserFactory.newInstance();
//由工厂创建SAXParserFactory解析器
SAXParser parser = factory.newSAXParser();
//
parser.parse("src/books.xml", new DefaultHandler(){
@Override
public void startDocument() throws SAXException {
System.out.println("文档开始了");
}
@Override
public void endDocument() throws SAXException {
System.out.println("文档结束了");
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
System.out.println("元素开始了");
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
System.out.println("元素结束了");
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
System.out.println(new String(ch,start,length));
}
});
}
}
方式二:封装数据到Javabean中
Book.java
package www.it.domain;
public class Book {
private String bookName;
private String author;
private double price;
public Book() {
super();
}
public Book(String bookName, String author, double price) {
super();
this.bookName = bookName;
this.author = author;
this.price = price;
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public double getPrice() {
return price;
}
public void setPrice(double price) {
this.price = price;
}
@Override
public String toString() {
return "Book [bookName=" + bookName + ", author=" + author + ", price=" + price + "]";
}
}
SaxTest.java
package www.it.sax;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import www.it.domain.Book;
public class SaxTest2 {
public static void main(String[] args) throws Exception {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
// 得到XMLReader对象
XMLReader reader = parser.getXMLReader();
final List books = new ArrayList();
// 设置内容处理器
reader.setContentHandler(new DefaultHandler() {
Book book = null;
String node = null;
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
if ("书".equals(qName)) {
book = new Book();
}
node = qName;
}
public void endElement(String uri, String localName, String qName) throws SAXException {
if ("书".equals(qName)) {
books.add(book);
book = null;
}
node = null;
}
public void characters(char[] ch, int start, int length) throws SAXException {
if ("书名".equals(node)) {
book.setBookName(new String(ch, start, length));
}
if ("作者".equals(node)) {
book.setAuthor(new String(ch, start, length));
}
if ("售价".equals(node)) {
book.setPrice(Double.parseDouble(new String(ch, start, length)));
}
}
});
//加载xml
reader.parse("src/books.xml");
//遍历集合
for(Book b : books){
System.out.println(b);
}
}
}
在DOM4J中,获得Document对象的方式有三种:
1. 读取XML文件,获得document对象
SAXReader reader = new SAXReader();
Document document = reader.read(new File("input.xml"));
2. 解析XML形式的文本,得到document对象
String text = "
Document document = DocumentHelper.parseText(text);
3. 主动创建document对象
Document document = DocumentHelper.createDocument();
//创建根节点
Element root = document.addElement("members");
1. 获取文档根节点
Element root = document.getRootElement();
2. 取得某个节点的子节点
Element element = node.element("书名");
3. 取得节点的文字
String text = node.getText();
4. 取得某个节点下所有名为“member”的子节点,并进行遍历
List nodes = rootElement.elements("member");
for(Iterator it = nodes.iterator();it.hasNext();){
Element ele = (Element)it.next();
//do something
}
5. 对某节点下的所有子节点进行遍历
for(Iterator it = root.elementIterator();it.hasNext()){
Element element = (Element)it.next();
}
6. 在某节点下添加子节点
Element ageElm = newMemberElm.addElement("age");
7. 设置节点文字
element.setText("29");
8. 删除某节点
//childElm是待删除的节点,parentElm是其父节点
parentElm.remove(childElm);
9. 添加一个CDATA节点
Element contentElm = infoElm.addElement("content");
contentElm.add(diary.getContent());
1. 取得某节点下的某属性
Element root = document.getRootElement();
//属性名name
Attribute attribute = root.attribute("size");
2. 取得属性的文字
String text = attribute.getText();
3. 删除某属性
Attribute attribute = root.attribute("size");
root.remove(attribute);
4. 遍历某节点的所有属性
Element root = document.getRootElement();
for(Iterator it = root.attributeIterator();it.hasNext();){
Attribute attribute = (Attribute)it.next();
String text = attribute.getText();
System.out.println(text);
}
5. 设置某节点的属性和文字
newMemberElm.addAttribute("name","sitinspring");
6. 设置属性的文字
Attribute attribute = root.attribute("name");
attribute.setText("sitinspring");
1. 文档中全为英文,不设置编码,直接写入的形式
XMLWriter writer = new XMLWriter(new FileWriter("output.xml"));
writer.write(document);
writer.close();
2. 文档中含有中文,设置编码格式写入的形式
OutputFormat format = OutputFormat.createPrettyPrint();
//指定XML编码
format.setEncoding("GBK");
XMLWriter writer = new XMLWriter(new FileOutputStream("output.xml").format);
writer.write(document);
writer.close();
1. 得到插入位置的节点列表(list)
2. 调用list.add(index,element),由index决定element的插入位置。
3. Element元素可以通过DocumentHelper对象得到,示例代码:
Element aaa = DocumentHelper.createElement("aaa");
aaa.setText("aaa");
List list = root.element("书").elements();
list.add(1,aaa);
//更新document
1. 将字符串转换为XML
String text = "
Document document = DocumentHelper.parseText(text);
2. 将文档或节点的XML转换为字符串
SAXReader reader = new SAXReader();
Document document = reader.read(new File("input.xml"));
Element root = document.getRootElement();
String docXmlText = document.asXML();
String rootXmlText = root.asXML();
Element memberElm = root.element("member");
String memberXmlText = memberElm.asXML();
Dom4JUtil
package com.it.utils;
import java.io.FileOutputStream;
import java.io.IOException;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter;
public class Dom4JUtil {
/**
* 用于将一个XML文档转换成Document对象
* @return
* @throws DocumentException
*/
public static Document getDocument() throws DocumentException{
SAXReader reader = new SAXReader();
return reader.read("src/books.xml");
}
/**
* 将一个Document写入到磁盘
* @param document
* @throws IOException
*/
public static void writeDocumentToXml(Document document) throws IOException{
XMLWriter writer = new XMLWriter(new FileOutputStream("src/books.xml"), OutputFormat.createPrettyPrint());
writer.write(document);
}
}
Dom4JTest
package com.it.dom4j.test;
import java.io.IOException;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Node;
import org.junit.Test;
import com.it.utils.Dom4JUtil;
public class Dom4jTest {
/**
* 1.得到具体的节点内容------->第二本书的作者
*/
@Test
public void test1(){
try {
//1.得到Document对象
Document document = Dom4JUtil.getDocument();
//2.得到根节点
Element root = document.getRootElement();
//3.找第二本书
Element node = (Element) root.elements().get(1);
//4.找第二本书的作者
Element author = node.element("作者");
//5.读取文本
String authorText = author.getText();
System.out.println(authorText);
} catch (DocumentException e) {
e.printStackTrace();
}
}
/**
* 2.遍历所有元素节点
*/
@Test
public void test2(){
try {
//1.得到Document对象
Document document = Dom4JUtil.getDocument();
//2.得到根节点
Element root = document.getRootElement();
//3.调用
treeWalk(root);
} catch (DocumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void treeWalk(Element ele){
System.out.println(ele.getName());
for(int i = 0; i < ele.nodeCount(); i++){
Node node = ele.node(i);
if(node.getNodeType() == Node.ELEMENT_NODE){
treeWalk((Element)node);
}
}
}
/**
* 修改某个元素节点的主体内容----->第一本书的售价改为35
*/
@Test
public void test3(){
try {
//1.得到Document对象
Document document = Dom4JUtil.getDocument();
//2.得到根节点
Element root = document.getRootElement();
//3.找到第一本书的节点
Element bookEle = root.element("书");
//4.找到售价节点
Element saleEle = bookEle.element("售价");
//5.设置售价节点文本值
saleEle.setText("35");
//6.写回XML
Dom4JUtil.writeDocumentToXml(document);
} catch (DocumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 向指定元素节点中增加子元素节点------>在第二本书的子节点中添加批发价节点
*/
@Test
public void test4(){
try {
//1.得到Document对象
Document document = Dom4JUtil.getDocument();
//2.得到根节点
Element root = document.getRootElement();
//3.找到第二本书
Element bookEle = (Element)root.elements().get(1);
//4.添加节点并设置文本
bookEle.addElement("批发价").setText("12");
//5.写回XML
Dom4JUtil.writeDocumentToXml(document);
} catch (DocumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 向指定元素节点上增加同级元素节点---->第一本书售价前面添加一个内部价节点
*/
@Test
public void test5(){
try {
//1.得到Document对象
Document document = Dom4JUtil.getDocument();
//2.得到根节点
Element root = document.getRootElement();
//3.找到第一本书
Element bookEle = root.element("书");
//4.创建一个节点
Element innerPriceElement = DocumentHelper.createElement("内部价");
innerPriceElement.setText("15");
//5.将创建的节点插入到指定位置
bookEle.elements().add(2, innerPriceElement);
//6.写回XML
Dom4JUtil.writeDocumentToXml(document);
} catch (DocumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 删除指定元素节点----->删除第一本书的内部价节点
*/
@Test
public void test6(){
try {
//1.得到Document对象
Document document = Dom4JUtil.getDocument();
//2.找到根节点
Element root = document.getRootElement();
//3.找到第一本书
Element bookEle = root.element("书");
//4.找到内部价节点
Element innerPriceEle = bookEle.element("内部价");
//5.删除内部价节点
bookEle.remove(innerPriceEle);
//6.写回XML
Dom4JUtil.writeDocumentToXml(document);
} catch (DocumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//7.操作XML文件属性---->在第二本书的书节点中,添加一个bookID属性
@Test
public void test7(){
try {
//1.得到Document对象
Document document = Dom4JUtil.getDocument();
//2.找到根节点
Element root = document.getRootElement();
//3.找到第二本书
Element bookEle = (Element)root.elements().get(1);
//4.设置属性
bookEle.addAttribute("bookID", "isb-111");
//5.取节点的属性
String str = bookEle.attributeValue("bookID");
System.out.println(str);
//写回到XML
Dom4JUtil.writeDocumentToXml(document);
} catch (DocumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
参考学习文档:XpathTutorial。功能强大,找节点非常快。
使用前需要导包。