java使用Jsoup爬虫,IP代理方式

package com.wxq.pachong.demo1;

import java.io.IOException;

import java.util.*;

import java.util.concurrent.ExecutorService;

import java.util.concurrent.Executors;

import net.sf.json.JSONObject;

import org.jsoup.Jsoup;

import org.jsoup.nodes.Document;

import org.slf4j.Logger;

import org.slf4j.LoggerFactory;

public class Test {

//获取代理ip,记得更换,也可在此网站注册,进行测试使用

  private final static StringGET_IP_URL ="http://webapi.http.zhimacangku.com/getip?num=5&type=2&pro=&city=0&yys=0&port=1&pack=43370&ts=0&ys=0&cs=0&lb=1&sb=0&pb=4&mr=1®ions=";

    public static void main(String[] args)throws InterruptedException {

ExecutorService exe = Executors.newFixedThreadPool(10);

        for (int i=0 ;i<1;i++) {

Document doc =null;

            try {

doc = Jsoup.connect(GET_IP_URL).get();

            }catch (IOException e) {

continue;

            }

System.out.println(doc.text());

            JSONObject jsonObject = JSONObject.fromObject(doc.text());

            List> list = (List>) jsonObject.get("data");

            List> ipList =new ArrayList>();

            for (Map map : list ) {

Map map1 =new HashMap();

                String ip = (String)map.get("ip");

                int port = (int)map.get("port");

                map1.put("ip",ip);

                map1.put("port",port);

                ipList.add(map1);

            }

System.out.println("ipList代理IP:"+ipList);

            int ipCount = list.size();

            System.out.println("IP总数量:"+ipCount);

            //代理IP均分给用户

            int userCount =5;

            int count = ipCount/userCount;

            System.out.println("每个用户分得IP数量:"+count);

            /*for(int j =1; j

}*/

            for (Map map : ipList ) {

String ip = (String)map.get("ip");

                int port = (int)map.get("port");

                checkIp a =new checkIp(ip, port,count);

                exe.execute(a);

                Thread.sleep(3000);

            }

exe.shutdown();

        }

}

}

class checkIpimplements Runnable {

private static Loggerlogger = LoggerFactory.getLogger(checkIp.class);

    private static int suc=0;

    private static int total=0;

    private static int fail=0;

    private Stringip ;

    private int port;

    private int count;

    public checkIp(String ip, int port,int count) {

super();

        this.ip = ip;

        this.port = port;

        this.count = count;

    }

@Override

    public void run() {

Random r =new Random();

        String[] ua = {"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",

                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36 OPR/37.0.2178.32",

                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",

                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",

                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586",

                "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",

                "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",

                "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",

                "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0)",

                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 BIDUBrowser/8.3 Safari/537.36",

                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36 Core/1.47.277.400 QQBrowser/9.4.7658.400",

                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 UBrowser/5.6.12150.8 Safari/537.36",

                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",

                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36 TheWorld 7",

                "Mozilla/5.0 (Windows NT 6.1; W…) Gecko/20100101 Firefox/60.0"};

        int i = r.nextInt(14);

        logger.info("检测中------ {}:{}",ip,port );

        Map map =new HashMap();

        //可以加参数,用户的账号密码

        map.put("userName","DD1838768852");

        map.put("passWord","DD1838768852");

        try {

total ++;

            long a = System.currentTimeMillis();

            //爬取的目标网站,url记得换下。。。!!!

            Document doc = Jsoup.connect("https://www.xxx.com/free/")

.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8")

.header("Accept-Encoding", "gzip, deflate, br")

.header("Accept-Language", "zh-CN,zh;q=0.9")

.header("Cache-Control", "max-age=0")

.header("User-Agent", ua[i])

.header("Cookie", "channelid=0; sid=1552294186460690; Hm_lvt_7ed65b1cc4b810e9fd37959c9bb51b31=1552294430; _ga=GA1.2.962118079.1552294430; _gid=GA1.2.346759656.1552294430; _gat=1; Hm_lpvt_7ed65b1cc4b810e9fd37959c9bb51b31=1552359319")

.header("Host", "www.xxx.com") //记得换下

.header("Referer", "https://www.xxx.com/")//记得换下

.ignoreContentType(true).ignoreHttpErrors(true)

.proxy(ip,port)

.data(map)

.timeout(30 *1000)

.post();

            System.out.println(ip+":"+port+"访问时间:"+(System.currentTimeMillis() -a) +"  访问结果: "+doc.text());

            suc ++;

        }catch (IOException e) {

e.printStackTrace();

            fail ++;

        }finally {

if (total ==count ) {

System.out.println("总次数:"+total);

                System.out.println("成功次数:"+suc);

                System.out.println("失败次数:"+fail);

            }

}

}

}

你可能感兴趣的:(java使用Jsoup爬虫,IP代理方式)