用XPath 表达式从 XML 文档中提取信息

package test;

import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class News {
	public static void main(String[] args) throws XPathExpressionException, ParserConfigurationException, SAXException, IOException {
		String uri = "src/news.xml";
		// 读取所有div节点的xpath
		// String expression="//div";
		// 读取newname=”军事新闻”的div节点的id属性的xpath
		// String expression="//div[@newname='军事新闻']";
		// 读取倒数第二个div节点的xpath
		// String expression="root/p/div[last()-1]";
		// 读取orders大于3且newtype=”0”的div节点的xpath
		String expression = "//div[@orders>3 and @newtype=0]";
		print(getNodeList(uri, expression));
	}
	
	static NodeList getNodeList(String uri,String expression)  throws ParserConfigurationException, SAXException,  IOException, XPathExpressionException {
		 DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
		    domFactory.setNamespaceAware(true); // never forget this!
		    DocumentBuilder builder = domFactory.newDocumentBuilder();
		    Document doc = builder.parse(uri);
		    XPathFactory factory = XPathFactory.newInstance();
		    XPath xpath = factory.newXPath();
		    XPathExpression expr = xpath.compile(expression);
		    Object result = expr.evaluate(doc, XPathConstants.NODESET);
		    return (NodeList) result;
	}
	static void print(NodeList nodes){
		for (int i = 0; i < nodes.getLength(); i++) {
	        System.out.println(nodes.item(i).getNodeName()); 
	    }
	}
}

 

news.xml:

 

<?xml version="1.0" encoding="utf-8"?>
<root>
	<p>
		<div id="1" newname="业界新闻" newtype="1" orders="1" comment="" />
		<div id="2" newname="汽车新闻" newtype="0" orders="2" comment="" />
		<div id="3" newname="军事新闻" newtype="1" orders="3" comment="">
			军事新闻备注</div>
		<div id="4" newname="法制新闻" newtype="1" orders="4" />
		<div id="5" newname="房产新闻" newtype="0" orders="5" />
		<div id="6" newname="其他新闻" newtype="0" orders="6" comment="" />
	</p>
</root>

 

Java 语言的 XPath API:http://www.ibm.com/developerworks/cn/xml/x-javaxpathapi.html

你可能感兴趣的:(html,xml,IBM)