nekohtml解析html

nekohtml-1.9.15.jar

xercesImpl-2.9.1.jar

/**
  * 转dom树
  * @param html
  * @return
  */

private Document getDocument(String html) {
  InputSource inputSource = new InputSource(new StringReader(html));
  Document document = null;
  DOMParser parser = new DOMParser();
  try {
   parser.setFeature("http://xml.org/sax/features/namespaces", true);
  } catch (Exception e) {
   logger.error("failed to set feature!" + " - " + e.getMessage());
  }
  try {
   parser.parse(inputSource);
  } catch (Exception e) {
   logger.error("failed to parse html!" + " - " + e.getMessage());
  }
  document = parser.getDocument();
  return document;
 }

你可能感兴趣的:(html)