它可以用xpath和jsoup方式完成。考虑这个例子。
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class SibilingParse {
public static void main(String[] args) {
try {
String html = "
+ "
"+ "
10110"+ "
IIND1000"+ "
+ "
+ "
INTROD. INGEN. INDUSTRIAL"+ "
100"+ "
100"+ "
0"+ "
"+ "
";//Xpath way
System.out.println("XPATH");
InputStream xmlStream = new ByteArrayInputStream(html.getBytes());
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = builderFactory.newDocumentBuilder();
Document xmlDocument = builder.parse(xmlStream);
XPath xPath = XPathFactory.newInstance().newXPath();
String expression = "/table/tr/td//*[text()='10110']//../following-sibling::td";
NodeList nodeList = (NodeList) xPath.compile(expression).evaluate(xmlDocument, XPathConstants.NODESET);
for (int i = 0; i < nodeList.getLength(); i++) {
System.out.println(nodeList.item(i).getFirstChild().getTextContent());
}
System.out.println();
// Jsoup way
org.jsoup.nodes.Document doc = Jsoup.parse(html);
Elements tds = doc.select("td:contains(10110)");
if(tds != null && tds.size() > 0){
for(Element td : tds.first().siblingElements()){
System.out.println(td.text());
}
}
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (XPathExpressionException e) {
e.printStackTrace();
}
}
}基于Url
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class SiblingJsoup {
public static void main(String[] args) {
try {
Document doc = Jsoup
.connect("http://registroapps.uniandes.edu.co/scripts/adm_con_horario1_joomla.php?depto=IIND")
.timeout(20000)
.get();
Elements tds = doc.select("font:containsOwn(10110)");
if (tds != null && tds.size() > 0) {
for (Element td : tds.parents().first().siblingElements()) {
System.out.println(td.text());
}
}
System.out.println("Done");
} catch (IOException e) {
e.printStackTrace();
}
}
}