使用代理IP发送请求,出现502错误 java.io.IOException: Unable to tunnel through proxy. Proxy returns “HTTP/1.1 502

今天有客户咨询使用代理IP请求一个国外网址(https://read.qxmd.com/),这个网址在国内打开速度比较慢,然后报错了 java.io.IOException: Unable to tunnel through proxy. Proxy returns “HTTP/1.1 502 Bad Gateway”

HTTP 502 – 网关错误

此处一般指Nginx做反向代理服务器时,所连接的Web应用服务器无响应导致的。一般是因为后端的Tomcat,Jetty,Tornado等服务器没有启动。我们知道,Nginx属于HTTP服务器,不属于Web应用服务器。

出现“HTTP 502 – 网关错误”,需要重启Web应用服务器。

排查问题解决方案:

  1. 确保目标网站的URL能够正常打开,如果打不开或者长时间不响应(打开很慢),那么这种情况下考虑是网址URL有问题,没有更好的解决方案,只能忽略个别错误。

  2. 目标URL打开正常,所以试一下不使用代理IP的情况下,能否正常返回,如果返回正常,可以继续第三步排查。如果返回仍然为502或者50X的错误,那么这种情况下考虑是网址URL有问题,没有更好的解决方案,只能忽略个别错误。

  3. 考虑是代理IP在可用时间范围内,无法正确打开URL导致最终无响应,解决办法就是换下一个新IP。

下面是我的测试代码:


import java.io.BufferedInputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.ProxyConfig;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class TestDynamicIp {
	public static List<String> ipList = new ArrayList<>();
	public static boolean gameOver = false;
	public static void main(String[] args) {
		long fetchIpSeconds = 3;
		int threadNum = 2;
		int testTime = 5;
		// 请填写无忧代理IP订单号,填写之后才可以提取到IP哦, https://www.data5u.com 获取
		String order = "请填写无忧代理IP订单号";
		// 这里换成百度贴吧的URL,可以把这里封装成一个方法,接收URL参数,实现不同的URL采集
		String targetUrl = "https://www.iqiyi.com/v_19rrlcvy50.html?pltfm=11&pos=title&flashvars=videoIsFromQidan%3Ditemviewclk_a#vfrm=5-6-0-1";
		// 是否加载JS,加载JS会导致速度变慢
		boolean useJS = false;
		// 请求超时时间,单位毫秒,默认5秒
		int timeOut = 5000;
		
		if (order == null || "".equals(order)) {
			System.err.println("请输入无忧代理IP动态代理订单号");
			return;
		}
		
		System.out.println(">>>>>>>>>>>>>>无忧代理动态IP测试开始<<<<<<<<<<<<<<");
		System.out.println("***************");
		System.out.println("接口返回IP为国内各地区,每次最多返回10个");
		System.out.println("提取IP间隔 " + fetchIpSeconds + " 秒 ");
		System.out.println("开启爬虫线程 " + threadNum);
		System.out.println("爬虫目标网址  " + targetUrl);
		System.out.println("测试次数 3 ");
		System.out.println("***************\n");
		TestDynamicIp tester = new TestDynamicIp();
		new Thread(tester.new GetIP(fetchIpSeconds * 1000, testTime, order)).start();
		for (int i = 0; i < threadNum; i++) {
			tester.new Crawler(200, targetUrl, useJS, timeOut).start();
		}
		while(!gameOver){
			try {
				Thread.sleep(100);
			} catch (InterruptedException e) {
				e.printStackTrace();
			}
		}
		System.out.println(">>>>>>>>>>>>>>无忧代理动态IP测试结束<<<<<<<<<<<<<<");
		System.exit(0);
	}
    
	// 采集部分
	public class Crawler extends Thread{
		@Override
		public void run() {
			while(!gameOver){
				webParseHtml(targetUrl);
				try {
					Thread.sleep(sleepMs);
				} catch (InterruptedException e) {
					e.printStackTrace();
				}
			}
		}
		
		long sleepMs = 200;
		boolean useJs = false;
		String targetUrl = "";
		int timeOut = 5000;
		
		public Crawler(long sleepMs, String targetUrl, boolean useJs, int timeOut) {
			this.sleepMs = sleepMs;
			this.targetUrl = targetUrl;
			this.useJs = useJs;
			this.timeOut = timeOut;
		}
		public String webParseHtml(String url) {
			String html = "";
			BrowserVersion[] versions = {BrowserVersion.CHROME, BrowserVersion.BEST_SUPPORTED};
			WebClient client = new WebClient(versions[(int)(versions.length * Math.random())]);
			try {
				client.addRequestHeader("Referer", "https://tieba.baidu.com/");
				client.getOptions().setThrowExceptionOnFailingStatusCode(false);
				client.getOptions().setJavaScriptEnabled(useJs);
				client.getOptions().setCssEnabled(false);
				client.getOptions().setThrowExceptionOnScriptError(false);
				client.getOptions().setTimeout(timeOut);
				client.getOptions().setAppletEnabled(true);
				client.getOptions().setGeolocationEnabled(true);
				client.getOptions().setRedirectEnabled(true);
				
				client.getOptions().setUseInsecureSSL(true);
				
				String ipport = getAProxy();
				if (ipport != null) {
					ProxyConfig proxyConfig = new ProxyConfig(ipport.split(":")[0], Integer.parseInt(ipport.split(":")[1]));
					client.getOptions().setProxyConfig(proxyConfig);
				}else {
					System.out.print(".");
					return "";
				}
			
				HtmlPage page = client.getPage(url);
				html = page.asXml();
				
				Document doc = Jsoup.parse(html);
				
				System.out.println(getName() + " 使用代理 " + ipport + "请求目标网址返回:" + doc.select("title").text() + doc.select("body").text());
				
				
			} catch (Exception e) {
				System.err.println(e.getMessage());
			} finally {
				client.close();
			}
			return html;
		}
		
	    private String getAProxy() {
	    	if (ipList.size() > 0) {
	    		String ip = ipList.get((int)(Math.random() * ipList.size()));
	    		return ip ;
			}
			return null;
		}
	}
	
	// 定时获取动态IP
	public class GetIP implements Runnable{
		long sleepMs = 1000;
		int maxTime = 3;
		String order = "";
		
		public GetIP(long sleepMs, int maxTime, String order) {
			this.sleepMs = sleepMs;
			this.maxTime = maxTime;
			this.order = order;
		}
		
		@Override
		public void run() {
			long getIpTime = 0;
			int time = 1;
			while(!gameOver){
				if(time >= 4){
					gameOver = true;
					break;
				}
				try {
					// http://soft.data5u.com//get/a/2d1bb3388b9182cfabc9f88cac1ffd68.html?ttl&areaCN=海南海口联通
					java.net.URL url = new java.net.URL("http://api.ip.data5u.com/dynamic/get.html?order=" + order + "&ttl&random=1");
			    	HttpURLConnection connection = (HttpURLConnection)url.openConnection();
			    	connection.setConnectTimeout(3000);
			    	connection = (HttpURLConnection)url.openConnection();
			    	
			        InputStream raw = connection.getInputStream();  
			        InputStream in = new BufferedInputStream(raw);  
			        byte[] data = new byte[in.available()];
			        int bytesRead = 0;  
			        int offset = 0;  
			        while(offset < data.length) {  
			            bytesRead = in.read(data, offset, data.length - offset);  
			            if(bytesRead == -1) {  
			                break;  
			            }  
			            offset += bytesRead;  
			        }  
			        in.close();  
			        raw.close();
					String[] res = new String(data, "UTF-8").split("\n");
					List<String> ipList = new ArrayList<>();
					for (String ip : res) {
						try {
							System.out.println(ip);
							String[] parts = ip.split(",");
							if (Integer.parseInt(parts[1]) > 0) {
								ipList.add(parts[0]);
							}
						} catch (Exception e) {
						}
					}
					if (ipList.size() > 0) {
						TestDynamicIp.ipList = ipList;
						System.out.println("第" + ++getIpTime + "次获取动态IP " + ipList.size() + " 个");
						time += 1;
					}
				} catch (Exception e) {
					e.printStackTrace();
					System.err.println(">>>>>>>>>>>>>>获取IP出错");
				}
				try {
					Thread.sleep(sleepMs);
				} catch (InterruptedException e) {
					e.printStackTrace();
				}
			}
		}
	}
	
}

无忧代理IP(www.data5u.com)原创文章,转载请注明出处:https://www.data5u.com/help/article-139.html

你可能感兴趣的:(爬虫系列,代理ip,502,Bad,Gateway,java爬虫)