Java爬取网站图片保存到本地的实例

爬取的是http://www.netbian.com网站的图片,要使用分类中的地址时只需要在网址后边加上"/地址":http://www.netbian.com/weimei/
保存地址E:\DeskTop\image\background

首先创建Maven工程
引入引入入依赖

<dependency>
	<groupId>org.jsoup</groupId>
	<artifactId>jsoup</artifactId>
	<version>1.11.3</version>
</dependency>

完整代码块:

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.*;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class ImgUtil02 {
    private static String url = "http://www.netbian.com";
    private static String urlll = "http://www.netbian.com";
    public static void main(String[] args) {
        upload(url);
    }
    public static void upload(String url) {
        int s = 1;
        String resouce = getResource(url);
        HashMap src =(HashMap)getUrlFromCode(resouce);
        Iterator itr=src.keySet().iterator();
        try {
            while (itr.hasNext()) {
                String key = (String) itr.next();
                String value = (String) src.get(key);
                // System.out.println(key);
                uploadByURL(fileName(key,value), value);

            }
        }catch (Exception e){
            e.printStackTrace();
        }
        String url1 = urlll + getPrev(resouce);
        System.out.print("地址:"+url1 + "  ");
        try {
            while (true) {
                upload(url1);
                s++;
                if (1 % s == 1){
                    System.out.println("睡眠5秒");
                    Thread.sleep(5000L);
                }
            }
        } catch (ArrayIndexOutOfBoundsException e){
            System.out.println("数组越界");
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    public static String getPrev(String resouce)
    {
        Document document = Jsoup.parse(resouce);
        Elements elements = document.select("div[class=page]").select("a[class=prev]");
        String nextUrl = ((Element)elements.get(elements.size() - 1)).attr("href");
        return nextUrl;
    }
    public static String fileName(String fileName,String fileUrl){
        if (fileName == null || fileName.length() > 255){
            return fileUrl.substring(fileUrl.lastIndexOf("/"));
        }else{
            String fileName1=fileName.replace("/","#");
            return fileName1+".jpg";
        }
    }
    public static String uploadByURL(String fileName,String fileUrl) {
        System.out.println(fileName);
        String savePath = "E:\\DeskTop\\image\\background\\";
        try
        {
            URL url = new URL(fileUrl);
            HttpURLConnection connection = (HttpURLConnection)url.openConnection();
            BufferedInputStream in = new BufferedInputStream(connection.getInputStream());
            File file1 = new File(savePath);
            if (!file1.exists()) {
                System.out.println("创建文件夹路径" + savePath);
                file1.mkdirs();
            }
            File file = new File(savePath + fileName);
            if (!file.exists()) {
                file.createNewFile();
            }
            BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(savePath + fileName));
            byte[] buffer = new byte[2048];
            int count = 0;
            while ((count = in.read(buffer)) > 0) {
                out.write(buffer, 0, count);
            }
            out.close();
            in.close();
            connection.disconnect();
            return savePath + fileName;
        } catch (IOException e) {
            System.out.println("图片下载失败:" + fileUrl);
        }
        return "";
    }
    public static Map<String, String> getUrlFromCode(String resouce)
    {
        Document document = Jsoup.parse(resouce);
        Elements elements = document.select("img");
        Map<String,String> map=new HashMap<String,String>();
        for (int i = 0; i < elements.size(); i++) {
            if(((Element)elements.get(i)).attr("alt")!=""){
                map.put(((Element)elements.get(i)).attr("alt"),((Element)elements.get(i)).attr("src"));
            }
        }
        System.out.println("大小:"+map.size());
        return map;
    }
    public static String getResource(String url)
    {
        URL newURL = null;
        URLConnection connection = null;
        InputStream inputStream = null;
        InputStreamReader reader = null;
        BufferedReader bReader = null;
        StringBuffer code = new StringBuffer();
        try {
            newURL = new URL(url);
            connection = newURL.openConnection();
            inputStream = connection.getInputStream();
            reader = new InputStreamReader(inputStream, "gbk");
            bReader = new BufferedReader(reader);
            String temp = null;
            while ((temp = bReader.readLine()) != null)
                code.append(temp + "\n");
        }
        catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return code.toString();
    }

你可能感兴趣的:(Java爬取网站图片保存到本地的实例)