NekoHTML and Dom4j

http://pro.ctlok.com/2010/07/java-read-html-dom4j-nekohtml.html

 

package com.ctlok.pro;

 

import java.io.IOException;

 

import org.cyberneko.html.parsers.DOMParser;

import org.dom4j.Document;

import org.dom4j.Node;

import org.dom4j.io.DOMReader;

import org.xml.sax.SAXException;

 

public class Main {

 

    /**

     * @param args

     */

    public static void main(String[] args) {

        try{

            String url = "http://hk.finance.yahoo.com/q?s=0005.HK";

            

            DOMParser parser = new DOMParser();

            parser.parse(url);

            

            org.w3c.dom.Document document = parser.getDocument();

            DOMReader domReader = new DOMReader();  

            Document doc = domReader.read(document);

            

            //Element name should be upper case

            Node name = doc.selectSingleNode("//DIV[@id='quote-bar-latest']/*/H2/node()");

            Node buy = doc.selectSingleNode("//DIV[@id='quote-bar-trade-info']/TABLE/TBODY/TR[1]/TD[2]");

            Node sell = doc.selectSingleNode("//DIV[@id='quote-bar-trade-info']/TABLE/TBODY/TR[2]/TD[2]");

            

            System.out.println(name.getText());

            System.out.println("Buy: " + buy.getText().substring(2));

            System.out.println("Sell: " + sell.getText().substring(2));

        } catch (SAXException e) {

            System.out.println(e.toString());

        } catch (IOException e) {

            System.out.println(e.toString());

        }

    }

}

 

你可能感兴趣的:(dom4j)