jsoup的maven依赖:
jar包下载地址:http://note.youdao.com/noteshare?id=c2444dc21b286006fb9027683f2a5053
org.jsoup
jsoup
1.7.3
package com.success.project;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.alibaba.druid.support.logging.Log;
import com.alibaba.druid.support.logging.LogFactory;
public class TestJsoupParseHtml {
private static final Log log = LogFactory.getLog(Test.class);
public static void main(String[] args) {
System.out.println("helloworld");
String url = "http://hotels.ctrip.com/hotel/beijing1/location94#ctm_ref=hod_hp_sb_lst";
String encoding ="utf-8";
String html = getHTMLResourceByUrl(url,encoding);
System.out.println(html);//输出html
String fengzhuang = Fengzhuang(html,encoding);
System.out.println(fengzhuang);
}
public static String Fengzhuang(String html,String encoding ){
Document parse = null;
List> list = new ArrayList>();
//解析html,按照什么编码进行解析html
parse = Jsoup.parse(html,encoding);
Element elementById = parse.getElementById("hotel_list");
Elements elementsByClass = elementById.getElementsByClass("searchresult_list");
for (Element element : elementsByClass) {
Map map = new HashMap();
//获取酒店的图片
String imgSrc = element.getElementsByTag("img").attr("src");
//获取酒店title
String title = element.getElementsByTag("ima").attr("alt");
//获取酒店的描述信息
String desc = element.getElementsByClass("searchresult_htladdress").text();
map.put("imgSrc", imgSrc);
map.put("title",title);
map.put("desc",desc);
list.add(map);
}
return list.toString();
}
//获取html
public static String getHTMLResourceByUrl(String url,String encoding){
StringBuffer sb = new StringBuffer();
URL urlObj =null;
URLConnection openConnection =null;
InputStreamReader isr = null;
BufferedReader br = null;
try {
urlObj = new URL(url);
openConnection = urlObj.openConnection();
isr = new InputStreamReader(openConnection.getInputStream(),encoding);
//建立文件缓冲流
br = new BufferedReader(isr);
//建立临时文件
String temp = null;
while((temp=br.readLine())!=null){
sb.append(temp+"\n");
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
log.error("error message", e);
} catch (IOException e) {
// TODO Auto-generated catch block
log.error("error message", e);
}finally{
try {
if(isr !=null){
isr.close();
}
} catch (IOException e) {
// TODO Auto-generated catch block
log.error("error message", e);
}
}
return sb.toString();
}
}
测试结果: