Java爬虫爬取代理ip

public class GetIpAddressUtil {
    public static Map<String, String> getIpAddress() {
        Map<String, String> maps = new HashMap<String, String>();
        for(int i = 1 ; i < 20; ++i) {
            try {
                Document doc = Jsoup.connect("http://www.xicidaili.com/nn/" + i)
                        .data("query", "Java")
                        .userAgent("Netscape/5")
                        .cookie("auth", "token")
                        .timeout(3000)
                        .get();
                String regex =
                        "((?:(?:25[0-5]|2[0-4]\\d|((1\\d{2})|([1-9]?\\d)))\\.){3}(?:25[0-5]|2[0-4]\\d|((1\\d{2})|([1-9]?\\d))))";
                Elements elements =
                        doc.select("td:matches(" + regex + ")");
                for(int j = 0; j < elements.size(); ++j) {
                    Element e = (Element) elements.get(j);
                    Element e1 = e.nextElementSibling();
                    String ip = e.text();
                    String prot = e1.text();
                    if(isPing(ip)) {
                        System.out.println(ip + " " + prot);
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return maps;
    }
    public static boolean isPing(String ip) {
        boolean status = false;
        if(ip != null) {
            try {
                status = InetAddress.getByName(ip).isReachable(3000);
            } catch(UnknownHostException e) {
            }
            catch(IOException e) {
            }
        }
        return status;
    }
}

你可能感兴趣的:(java,爬虫)