jsoup 读取解析html,jsoup的maven依赖及jsoup解析html获取Element的数据(demo)

jsoup的maven依赖:

jar包下载地址:http://note.youdao.com/noteshare?id=c2444dc21b286006fb9027683f2a5053

org.jsoup

jsoup

1.7.3

package com.success.project;

import java.io.BufferedReader;

import java.io.IOException;

import java.io.InputStreamReader;

import java.net.MalformedURLException;

import java.net.URL;

import java.net.URLConnection;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

import org.jsoup.Jsoup;

import org.jsoup.nodes.Document;

import org.jsoup.nodes.Element;

import org.jsoup.select.Elements;

import com.alibaba.druid.support.logging.Log;

import com.alibaba.druid.support.logging.LogFactory;

public class TestJsoupParseHtml {

private static final Log log = LogFactory.getLog(Test.class);

public static void main(String[] args) {

System.out.println("helloworld");

String url = "http://hotels.ctrip.com/hotel/beijing1/location94#ctm_ref=hod_hp_sb_lst";

String encoding ="utf-8";

String html = getHTMLResourceByUrl(url,encoding);

System.out.println(html);//输出html

String fengzhuang = Fengzhuang(html,encoding);

System.out.println(fengzhuang);

}

public static String Fengzhuang(String html,String encoding ){

Document parse = null;

List> list = new ArrayList>();

//解析html,按照什么编码进行解析html

parse = Jsoup.parse(html,encoding);

Element elementById = parse.getElementById("hotel_list");

Elements elementsByClass = elementById.getElementsByClass("searchresult_list");

for (Element element : elementsByClass) {

Map map = new HashMap();

//获取酒店的图片

String imgSrc = element.getElementsByTag("img").attr("src");

//获取酒店title

String title = element.getElementsByTag("ima").attr("alt");

//获取酒店的描述信息

String desc = element.getElementsByClass("searchresult_htladdress").text();

map.put("imgSrc", imgSrc);

map.put("title",title);

map.put("desc",desc);

list.add(map);

}

return list.toString();

}

//获取html

public static String getHTMLResourceByUrl(String url,String encoding){

StringBuffer sb = new StringBuffer();

URL urlObj =null;

URLConnection openConnection =null;

InputStreamReader isr = null;

BufferedReader br = null;

try {

urlObj = new URL(url);

openConnection = urlObj.openConnection();

isr = new InputStreamReader(openConnection.getInputStream(),encoding);

//建立文件缓冲流

br = new BufferedReader(isr);

//建立临时文件

String temp = null;

while((temp=br.readLine())!=null){

sb.append(temp+"\n");

}

} catch (MalformedURLException e) {

// TODO Auto-generated catch block

log.error("error message", e);

} catch (IOException e) {

// TODO Auto-generated catch block

log.error("error message", e);

}finally{

try {

if(isr !=null){

isr.close();

}

} catch (IOException e) {

// TODO Auto-generated catch block

log.error("error message", e);

}

}

return sb.toString();

}

}

测试结果:

你可能感兴趣的:(jsoup,读取解析html)