java jsoupxpath_使用Jsoup的XPath表达式

它可以用xpath和jsoup方式完成。考虑这个例子。

import java.io.ByteArrayInputStream;

import java.io.IOException;

import java.io.InputStream;

import javax.xml.parsers.DocumentBuilder;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.xpath.XPath;

import javax.xml.xpath.XPathConstants;

import javax.xml.xpath.XPathExpressionException;

import javax.xml.xpath.XPathFactory;

import org.jsoup.Jsoup;

import org.jsoup.nodes.Element;

import org.w3c.dom.Document;

import org.w3c.dom.NodeList;

import org.xml.sax.SAXException;

public class SibilingParse {

public static void main(String[] args) {

try {

String html = "

+ "

"

+ "

10110"

+ "

IIND1000"

+ "

1
"

+ "

3
"

+ "

INTROD. INGEN. INDUSTRIAL"

+ "

100"

+ "

100"

+ "

0"

+ "

"

+ "

";

//Xpath way

System.out.println("XPATH");

InputStream xmlStream = new ByteArrayInputStream(html.getBytes());

DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();

DocumentBuilder builder = builderFactory.newDocumentBuilder();

Document xmlDocument = builder.parse(xmlStream);

XPath xPath = XPathFactory.newInstance().newXPath();

String expression = "/table/tr/td//*[text()='10110']//../following-sibling::td";

NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(xmlDocument, XPathConstants.NODESET);

for (int i = 0; i < nodeList.getLength(); i++) {

System.out.println(nodeList.item(i).getFirstChild().getTextContent());

}

System.out.println();

// Jsoup way

org.jsoup.nodes.Document doc = Jsoup.parse(html);

Elements tds = doc.select("td:contains(10110)");

if(tds != null && tds.size() > 0){

for(Element td : tds.first().siblingElements()){

System.out.println(td.text());

}

}

} catch (ParserConfigurationException e) {

e.printStackTrace();

} catch (SAXException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

} catch (XPathExpressionException e) {

e.printStackTrace();

}

}

}基于Url

import java.io.IOException;

import org.jsoup.Jsoup;

import org.jsoup.nodes.Document;

import org.jsoup.nodes.Element;

import org.jsoup.select.Elements;

public class SiblingJsoup {

public static void main(String[] args) {

try {

Document doc = Jsoup

.connect("http://registroapps.uniandes.edu.co/scripts/adm_con_horario1_joomla.php?depto=IIND")

.timeout(20000)

.get();

Elements tds = doc.select("font:containsOwn(10110)");

if (tds != null && tds.size() > 0) {

for (Element td : tds.parents().first().siblingElements()) {

System.out.println(td.text());

}

}

System.out.println("Done");

} catch (IOException e) {

e.printStackTrace();

}

}

}

你可能感兴趣的:(java,jsoupxpath)