HTMLParser解析HTMl标签的实例

public class HtmlParserTest {

	/**
	 * @param args
	 * @throws ParserException
	 */
	/**
	 * @param args
	 * @throws ParserException
	 */
	public static void main(String[] args) throws ParserException {
		long first = System.currentTimeMillis();

		/**
		 * 解析器
		 */
		String path_url = "http://mobile.zol.com.cn/";
		Parser myParser = new Parser(path_url);
		/*设置编码 */
		myParser.setEncoding("GBK");

		/*HTML标签名定义*/
		String div = "div";
		String className = "class";
		String classValue = "category_nav";
		String href="href";

		/*
		 * 过滤到的标签过滤
	   * NodeFilter filter = new TagNameFilter(div);
		*/
		/*
		 * 过滤有属性的HTML
		 * NodeFilter[] nodeFilters = new NodeFilter[1];
		 * nodeFilters[0] = new AndFilter(new TagNameFilter(div),new HasAttributeFilter(className,classValue));
		*/
		NodeFilter nodeFilter = new AndFilter(new TagNameFilter(div),new HasAttributeFilter(className,classValue));
		
		/**
		 * 进行查询匹配
		 */
		NodeList nodeList = myParser.extractAllNodesThatMatch(nodeFilter);
		
		/**
		 * 可执行多次过滤器
		 * 在NodeList中执行过滤器时,第二个参数为True
		 */
		nodeList = nodeList.extractAllNodesThatMatch(new TagNameFilter("dl"),true);
		nodeList = nodeList.extractAllNodesThatMatch(new TagNameFilter("dt"),true);		
		nodeList = nodeList.extractAllNodesThatMatch(new AndFilter(new TagNameFilter("a"),new HasAttributeFilter(href)),true);
		
		//得到一个Node数组
		Node[] node = nodeList.toNodeArray();
		System.out.println("链接名称:" + ((LinkTag) node[0]).getLinkText());
		System.out.println("链接地址:"+((LinkTag)node[0]).getLink());
		/**
		 * 计算执行时间
		 */
		long now = System.currentTimeMillis();
		double time = (double) (now - first) / 1000;
		System.out.println("消耗时间:" + time);
	}

}

你可能感兴趣的:(HTMLParser解析HTMl标签的实例)