java爬虫,爬取美女图片

工具:

commons-io-2.4.jar

httpclient-4.4.1.jar

httpcore-4.4.1.jar

jsoup-1.10.2.jar

爬取效果:

java爬虫,爬取美女图片_第1张图片

代码如下:

package caiji;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;

import org.apache.commons.io.FileUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * java爬虫,爬取美女图片
 * @author www.java00123.com
 * @version V1.0
 * @date 2018年12月4日上午11:20:15
 */
public class CaiJiTest {

	/**
	 * 1.链接目标网址
	 * 2.解析html
	 * 3.提取图片
	 * 4.本地保存
	 * @param args
	 * @throws IOException 
	 * @throws MalformedURLException 
	 */
	public static void main(String[] args) throws MalformedURLException, IOException {
		//1.链接目标网址
		String url = "http://www.java00123.com/html/xe/20181204/9197.html";
		Document doc = JsoupUtil.getDocument(url,"http://www.java00123.com/");
    	//System.out.println(doc);
    	
    	//2.解析Html
    	Element element = doc.getElementsByClass("c_l_l_c_main").first();
    	//System.out.println(element);
    	
    	//3.提取图片
    	Document imgDoc = JsoupUtil.parseHtml(element.toString());
    	Elements elements = imgDoc.select("img[src]");
    	//System.out.println(elements);
    	
    	//4.本地保存
    	int i = 1;
    	String picFile = "c:\\2\\";
    	for (Element ele : elements) {
    	    String src = ele.attr("src");//获取到src的值
    	    System.out.println(src);
    	    String name = src.substring(src.lastIndexOf("/")+1, src.length());
    	    System.out.println(name);
    	    FileUtils.copyURLToFile(new URL(src), new File(picFile+"\\"+name));
    	    System.out.println("爬取成功!");
    	}
	}

}

JsoupUtil 工具类

package caiji;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

/**
 * Jspup工具类
 * @author java00123.com
 *
 */
public class JsoupUtil {
	
	/**
	 * Jspup工具类
	 * url:采集的URL
	 * domian: 采集的域名  
	 * @author java00123.com
	 */
	public static Document getDocument(String url,String domain) {  
        int error_count = 0;  
        Document doc = null;  
        while (true) {  
            if (error_count > 10) {  
                break;  
            }  
            try {  
                doc = Jsoup  
                        .connect(url)  
                        .timeout(6000)  
                        .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")  
                        .header("Accept-Encoding", "gzip,deflate,sdch")  
                        .header("Connection", "keep-alive")  
                        .header("referer", domain)
                        .header("cookie", "data")
                        .followRedirects(true)  
                        .userAgent("Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)")  
                        .get();  
            } catch (Exception e) {  
                error_count++;  
            }  
            if (doc != null) {  
                break;  
            }  
        }  
        return doc;  
    }  
  
    public static Document parseHtml(String html) {  
        return Jsoup.parse(html);  
    }  
    
   
}

 

 

你可能感兴趣的:(java爬虫,爬取美女图片)