nekohtml 用法

//nekohtml结合xpath用法
DOMParser parser = new DOMParser();   
    try {   
           //设置网页的默认编码   
           parser.setProperty("http://cyberneko.org/html/properties/default-encoding","gb2312");   
           /*The Xerces HTML DOM implementation does not support namespaces   
           and cannot represent XHTML documents with namespace information.   
           Therefore, in order to use the default HTML DOM implementation with NekoHTML's   
           DOMParser to parse XHTML documents, you must turn off namespace processing.*/  
           parser.setFeature("http://xml.org/sax/features/namespaces", false);   
  
           String strURL = "http://product.dangdang.com/product.aspx?product_id=9317290";   
           BufferedReader in = new BufferedReader(   
                   new InputStreamReader(   
                           new URL(strURL).openStream()));   
           parser.parse(new InputSource(in));   
           in.close();   
          } catch (Exception e) {   
           e.printStackTrace();   
          }   
          Document doc = parser.getDocument();   
          // tags should be in upper case   
          String productsXpath = "/HTML/BODY/DIV[2]/DIV[4]/DIV[2]/DIV/DIV[3]/UL[@class]/LI[9]";   
          NodeList products;   
          try {   
              products = XPathAPI.selectNodeList(doc, productsXpath);   
              System.out.println("found: " + products.getLength());   
              Node node = null;   
              for(int i=0; i< products.getLength();i++)   
              {   
                  node = products.item(i);   
                  System.out.println( i + ":\n" + node.getTextContent());   
              }   
          }catch (TransformerException e) {   
              e.printStackTrace();   
          }   

你可能感兴趣的:(html,xml,XHTML)