JSoup使用快代理的独享代理IP获取网页的接口

Java使用JSoup获取网页数据,快代理的独享代理IP

APIHttpClient.java类

import java.io.IOException;
import java.security.SecureRandom;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.X509TrustManager;

import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

/**
 * 使用httpclient调用API接口
 */
public class APIHttpClient {

private String useIp;
	private String usePort;

	/*
	 * 先初始化 APIHttpClient name=new APIHttpClient(); 直接调用这个函数 Document
	 * doc=name.getJsoupDocGet(url);
	 *
	 */
	public Document getJsoupDocGet(String url) throws ClientProtocolException, IOException {

		Document doc = null;
		Random random = new Random();
		int i = 0;
		while (i < 2 && doc == null) {
			try {
				trustEveryone();
				getRandomIP();
				// System.out.print(useIp);
				// System.out.print(usePort);
				Thread.sleep(random.nextInt(100) + 100);
				doc = Jsoup.connect(url).proxy(useIp, Integer.valueOf(usePort)).header("Accept", "*/*")
						.header("Accept-Encoding", "gzip, deflate")
						.header("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3")
						.header("Referer", "https://www.baidu.com/")
						.header("User-Agent",
								"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:48.0) Gecko/20100101 Firefox/48.0")
						.timeout(1000 * 60).get();
				if (doc != null) {
					System.out.println("代理正确");
					return doc;
				}
			} catch (Exception e) {

			}
			i++;
		}
		if (doc == null) {
			try {
				Thread.sleep(random.nextInt(100) + 100);
				doc = Jsoup.connect(url).header("Accept", "*/*").header("Accept-Encoding", "gzip, deflate")
						.header("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3")
						.header("Referer", "https://www.baidu.com/")
						.header("User-Agent",
								"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:48.0) Gecko/20100101 Firefox/48.0")
						.timeout(1000 * 60).get();
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		}
		return doc;
	}

	/*
	 * 从一个或者几个独享IP中进行随机获取
	 */
	public void getRandomIP() throws ClientProtocolException, IOException {

		Map ipAndPort = new HashMap();
		ipAndPort.clear();
		ipAndPort.put("IP号", "端口");// 添加独享IP,还可以多put几个
		String[] keys = ipAndPort.keySet().toArray(new String[0]);
		Random random = new Random();

		int index = random.nextInt(keys.length);

		useIp = keys[index];
		usePort = ipAndPort.get(useIp);
	}

	private void trustEveryone() {
		try {
			HttpsURLConnection.setDefaultHostnameVerifier(new HostnameVerifier() {
				public boolean verify(String hostname, SSLSession session) {
					return true;
				}
			});

			SSLContext context = SSLContext.getInstance("TLS");
			context.init(null, new X509TrustManager[] { new X509TrustManager() {
				public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
				}

				public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
				}

				public X509Certificate[] getAcceptedIssuers() {
					return new X509Certificate[0];
				}
			} }, new SecureRandom());
			HttpsURLConnection.setDefaultSSLSocketFactory(context.getSocketFactory());
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

Test.java

import java.io.IOException;
import org.apache.http.client.ClientProtocolException;
import org.jsoup.nodes.Document;

public class Test {
	public static void main(String args[]) throws ClientProtocolException, IOException {
		APIHttpClient test = new APIHttpClient();
		int i = 0;
		Document document;
		while (true) {
			document = test.getJsoupDocGet("https://cn.imslp.org/wiki/Apolo_y_Dafne_(Dur%C3%B3n%2C_Sebasti%C3%A1n)");
			i++;
			//System.out.println("第:" + i);
			if (document == null) {
				System.err.println("第:" + i + "次error!");
				break;
			}
		}
	}
}

 

你可能感兴趣的:(爬虫,Jsoup,代理独享IP)