java爬虫获取图片

获取每个页面图片链接地址

package com.wxq.pachong;

import com.alibaba.fastjson.JSON;

import java.util.ArrayList;

import java.util.List;

/**

* @title:

* @description:

* @author:

* @date:2019/3/11 15:09

**/

public class JianDanHtmlParserimplements Runnable{

private Stringhtml;

private int page;

public JianDanHtmlParser(String html,int page) {

this.html = html;

this.page = page;

}

@Override

public void run() {

System.out.println("==========第"+page+"页============");

List list =new ArrayList();

html =html.substring(html.indexOf("list"));

String[] images =html.split("li>");

for (String image : images) {

String[] ss = image.split("br");

for (String s : ss) {

if (s.indexOf("

list.add(s.substring(i, s.indexOf("\"", i +1)));

}catch (Exception e) {

System.out.println(s);

}

for(String imageUrl : list){

System.out.println("图片链接:"+ JSON.toJSONString(imageUrl));

if(imageUrl.indexOf("uploads")>0){

new Thread(new JianDanImageCreator(imageUrl,page)).start();

}

将图片链接下载到本地

package com.wxq.pachong;

import java.io.File;

import java.io.FileOutputStream;

import java.io.InputStream;

import java.io.OutputStream;

import java.net.URL;

/**

* @title:

* @description:

* @author:

* @date:2019/3/11 15:09

**/

public class JianDanImageCreatorimplements Runnable{

private static int count =0;

private StringimageUrl;

private int page;

//存储路径，自定义

private static final StringbasePath ="E:/jiandan";

public JianDanImageCreator(String imageUrl,int page) {

this.imageUrl = imageUrl;

this.page = page;

}

@Override

public void run() {

File dir =new File(basePath);

if(!dir.exists()){

dir.mkdirs();

System.out.println("图片存放于"+basePath+"目录下");

}

String imageName =imageUrl.substring(imageUrl.lastIndexOf("/")+1);

try {

File file =new File(basePath+"/"+page+"--"+imageName);

OutputStream os =new FileOutputStream(file);

//创建一个url对象

URL url =new URL(imageUrl);

InputStream is = url.openStream();

byte[] buff =new byte[1024];

while(true) {

int readed = is.read(buff);

if(readed == -1) {

break;

}

byte[] temp =new byte[readed];

System.arraycopy(buff, 0, temp, 0, readed);

//写入文件

os.write(temp);

}

System.out.println("第"+(count++)+"张妹子:"+file.getAbsolutePath());

is.close();

os.close();

}catch (Exception e) {

e.printStackTrace();

}

爬虫执行方法

package com.wxq.pachong;

import org.apache.http.client.config.CookieSpecs;

import org.apache.http.client.config.RequestConfig;

import org.apache.http.client.methods.CloseableHttpResponse;

import org.apache.http.client.methods.HttpGet;

import org.apache.http.impl.client.CloseableHttpClient;

import org.apache.http.impl.client.HttpClients;

import java.io.InputStream;

/**

* @title:

* @description:

* @author:

* @date:2019/3/11 15:07

**/

public class SimpleSpider {

//起始页码

private static final int page =1264;

public static void main(String[] args) {

//HttpClient 超时配置

RequestConfig globalConfig = RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).setConnectionRequestTimeout(6000).setConnectTimeout(6000).build();

CloseableHttpClient httpClient = HttpClients.custom().setDefaultRequestConfig(globalConfig).build();

System.out.println("5秒后开始抓取煎蛋妹子图……");

for (int i =page; i >0; i--) {

//创建一个GET请求

HttpGet httpGet =new HttpGet("http://www.jf258.com/nvsheng/"+ i+"1.html");

httpGet.addHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36");

httpGet.addHeader("Cookie","_gat=1; nsfw-click-load=off; gif-click-load=on; _ga=GA1.2.1861846600.1423061484");

try {

//不敢爬太快

Thread.sleep(5000);

//发送请求，并执行

CloseableHttpResponse response = httpClient.execute(httpGet);

InputStream in = response.getEntity().getContent();

String html = Utils.convertStreamToString(in);

//网页内容解析

new Thread(new JianDanHtmlParser(html, i)).start();

}catch (Exception e) {

e.printStackTrace();

}

工具类

package com.wxq.pachong;

import java.io.BufferedReader;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

/**

* @title:

* @description:

* @author:

* @date:2019/3/11 15:20

**/

public class Utils {

public static StringconvertStreamToString(InputStream is) {

BufferedReader reader =new BufferedReader(new InputStreamReader(is));

StringBuilder sb =new StringBuilder();

String line =null;

try {

while ((line = reader.readLine()) !=null) {

sb.append(line +"\n");

}

}catch (IOException e) {

e.printStackTrace();

}finally {

try {

is.close();

}catch (IOException e) {

e.printStackTrace();

}

return sb.toString();

}

java爬虫获取图片

获取每个页面图片链接地址

将图片链接下载到本地

爬虫执行方法

工具类

你可能感兴趣的:(java爬虫获取图片)