Jsoup爬取网页内容

	public static void main(String [] args) throws Exception{
		//获取连接内容
		 Document document=Jsoup.connect("http://www.biquge5200.com/14_14620/").userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0)").get();
		 //根据标签获取书籍章节
		 Elements chapters=document.getElementsByTag("dd").select("a");
		 //根据标签获取书籍名称
		 String bookname= document.getElementById("info").getElementsByTag("h1").text();
	     String cover= document.getElementById("fmimg").select("img").attr("abs:src");//书籍封面地址
		 List list=new ArrayList();
		 for (Element e:chapters) {
			 String second_url=e.attr("abs:href");//内容详情地址
			 Document contentDocument=Jsoup.connect(second_url).userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0)").get();
			 //章节内容
			 String content=contentDocument.getElementById("content").text();
			 String chaptername=e.text();//章节名
			 EbookChapter chapter=new EbookChapter();
			 chapter.setChapter(chaptername);
			 chapter.setContent(content);
			 chapter.setCopyurl(second_url);
			 list.add(chapter);
		 }
		
	}

你可能感兴趣的:(Jsoup爬取网页内容)