java jsoup 网络爬虫 学习例子(三) 抓取豆瓣电影海报图片 下载到本地

java jsoup 网络爬虫 学习例子(三) 抓取豆瓣电影海报图片 下载到本地

 

 

package com.iteye.injavawetrust.img;

/**
 * 
 * @author InJavaWeTrust
 *
 */
public class Constants {
	
	/**
	 * 存放海报图片目录
	 */
	public static final String IMGPATH = "E:\\InJavaWeTrust\\jsoup\\img\\";
	/**
	 * JPG格式
	 */
	public static final String EXTJPG = ".jpg";
	/** 
     * 豆瓣最受欢迎的影评URL 
     */  
    public static final String URL = "http://movie.douban.com/review/best/";  
    /** 
     * 每页显示记录条数 
     */  
    public static final int NUM = 10;  
    /** 
     * 拼接分页 
     */  
    public static final String START = "?start=";

}


package com.iteye.injavawetrust.img;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * 
 * @author InJavaWeTrust
 *
 */
public class JsoupUtil {
	
	private JsoupUtil(){
		
	}
	
	private static final JsoupUtil instance = new JsoupUtil();
	
	public static JsoupUtil getInstance(){
		return instance;
	}
	
	/**
	 * 获得电影海报图片
	 * @param imgSrc 海报路径
	 * @param imgAlt 海报名称
	 */
	public void getImg(String imgSrc, String imgAlt) {
		InputStream is = null;
		OutputStream os = null;
		try {
			URL url = new URL(imgSrc);
			is = url.openStream();
			os = new FileOutputStream(new File(Constants.IMGPATH + imgAlt +  Constants.EXTJPG));
			int data = 0;
			while((data = is.read()) != -1){
				os.write(data);
				os.flush();
			}
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if(null != is){
				try {
					is.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
			if(null != os){
				try {
					os.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}
	
	/**
	 * 抓取豆瓣电影海报图片
	 */
	public void getDoubanFilmImg(){
		try{
			//1--5页电影海报图片
			for(int i = 0; i < 5; i++){
				String url = Constants.URL + Constants.START
						+ String.valueOf(i * Constants.NUM);
				Document document = Jsoup.connect(url).timeout(5000).get();
				Elements ul = document.select("ul");
				Iterator<Element> ulIter = ul.iterator();
				while(ulIter.hasNext()){
					Elements li = ulIter.next().select("li.ilst");
					Iterator<Element> itLi = li.iterator();
					while(itLi.hasNext()){
						Element ele = itLi.next();
						Elements img = ele.select("img");
						String imgSrc = img.attr("src");
						System.out.println(imgSrc);
						String imgAlt = img.attr("alt");
						System.out.println(imgAlt);
						JsoupUtil.getInstance().getImg(imgSrc, imgAlt);
					}
				}
			}
		} catch (Exception e){
			e.printStackTrace();
		}
	}
	
}


package com.iteye.injavawetrust.img;

/**
 * 
 * @author InJavaWeTrust
 *
 */
public class GetDoubanFilmImg {
	
	public static void main(String[] args) {
		JsoupUtil ju = JsoupUtil.getInstance();
		ju.getDoubanFilmImg();
	}

}

 

 

运行结果:

http://img3.douban.com/view/movie_poster_cover/ipst/public/p2315652554.jpg
太阳的后裔
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2204200412.jpg
女医明妃传
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2319378505.jpg
上瘾
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2226342851.jpg
维京传奇 第三季
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2231323410.jpg
少帅
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2315652554.jpg
太阳的后裔
http://img3.douban.com/view/movie_poster_cover/ipst/public/p868550285.jpg
银翼杀手
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2315652554.jpg
太阳的后裔
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2315805100.jpg
西游记之孙悟空三打白骨精
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2309810802.jpg
45周年
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2315805100.jpg
西游记之孙悟空三打白骨精
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2315277870.jpg
中国故事
http://img3.douban.com/view/movie_poster_cover/ipst/public/p2322058074.jpg
因为爱情有幸福

。。。。。。
。。。。。。
。。。。。。


文件截图如下:

 

你可能感兴趣的:(java,JSoup,网络爬虫,下载到本地,学习例子(三),抓取豆瓣电影海报图片)