爬取的是http://www.netbian.com网站的图片,要使用分类中的地址时只需要在网址后边加上"/地址":http://www.netbian.com/weimei/
保存地址E:\DeskTop\image\background
首先创建Maven工程
引入引入入依赖
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
完整代码块:
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class ImgUtil02 {
private static String url = "http://www.netbian.com";
private static String urlll = "http://www.netbian.com";
public static void main(String[] args) {
upload(url);
}
public static void upload(String url) {
int s = 1;
String resouce = getResource(url);
HashMap src =(HashMap)getUrlFromCode(resouce);
Iterator itr=src.keySet().iterator();
try {
while (itr.hasNext()) {
String key = (String) itr.next();
String value = (String) src.get(key);
// System.out.println(key);
uploadByURL(fileName(key,value), value);
}
}catch (Exception e){
e.printStackTrace();
}
String url1 = urlll + getPrev(resouce);
System.out.print("地址:"+url1 + " ");
try {
while (true) {
upload(url1);
s++;
if (1 % s == 1){
System.out.println("睡眠5秒");
Thread.sleep(5000L);
}
}
} catch (ArrayIndexOutOfBoundsException e){
System.out.println("数组越界");
} catch (InterruptedException e) {
e.printStackTrace();
}
}
public static String getPrev(String resouce)
{
Document document = Jsoup.parse(resouce);
Elements elements = document.select("div[class=page]").select("a[class=prev]");
String nextUrl = ((Element)elements.get(elements.size() - 1)).attr("href");
return nextUrl;
}
public static String fileName(String fileName,String fileUrl){
if (fileName == null || fileName.length() > 255){
return fileUrl.substring(fileUrl.lastIndexOf("/"));
}else{
String fileName1=fileName.replace("/","#");
return fileName1+".jpg";
}
}
public static String uploadByURL(String fileName,String fileUrl) {
System.out.println(fileName);
String savePath = "E:\\DeskTop\\image\\background\\";
try
{
URL url = new URL(fileUrl);
HttpURLConnection connection = (HttpURLConnection)url.openConnection();
BufferedInputStream in = new BufferedInputStream(connection.getInputStream());
File file1 = new File(savePath);
if (!file1.exists()) {
System.out.println("创建文件夹路径" + savePath);
file1.mkdirs();
}
File file = new File(savePath + fileName);
if (!file.exists()) {
file.createNewFile();
}
BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(savePath + fileName));
byte[] buffer = new byte[2048];
int count = 0;
while ((count = in.read(buffer)) > 0) {
out.write(buffer, 0, count);
}
out.close();
in.close();
connection.disconnect();
return savePath + fileName;
} catch (IOException e) {
System.out.println("图片下载失败:" + fileUrl);
}
return "";
}
public static Map<String, String> getUrlFromCode(String resouce)
{
Document document = Jsoup.parse(resouce);
Elements elements = document.select("img");
Map<String,String> map=new HashMap<String,String>();
for (int i = 0; i < elements.size(); i++) {
if(((Element)elements.get(i)).attr("alt")!=""){
map.put(((Element)elements.get(i)).attr("alt"),((Element)elements.get(i)).attr("src"));
}
}
System.out.println("大小:"+map.size());
return map;
}
public static String getResource(String url)
{
URL newURL = null;
URLConnection connection = null;
InputStream inputStream = null;
InputStreamReader reader = null;
BufferedReader bReader = null;
StringBuffer code = new StringBuffer();
try {
newURL = new URL(url);
connection = newURL.openConnection();
inputStream = connection.getInputStream();
reader = new InputStreamReader(inputStream, "gbk");
bReader = new BufferedReader(reader);
String temp = null;
while ((temp = bReader.readLine()) != null)
code.append(temp + "\n");
}
catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return code.toString();
}