本人从事大数据相关工作。最近遇到一个问题就是,dmp系统需要向第三方系统使用http协议获取数据。由于对http协议本身了解和理解有限,导致前期写的代码数据请求量怎么都上不去。特此写一篇文章警示后来者。
http基础介绍文件请楼下左拐。(也是本人早期整理或者搜集到有道云笔记的文章)
http://note.youdao.com/noteshare?id=9cc5f12d052c1b75053aed3d15d3d93a&sub=38F20AA66E594424B4A6208C220B54B0
http://note.youdao.com/noteshare?id=2f950c7e1de7187582111d806f80996e&sub=ACA98A13D3C34B108FA178796199431F
大部分编程人员对http理解应该是,该协议是个无状态协议。应用上很容易,使用不做细致了解直接使用get和post方法。做短连接式的通信。其实http是有长连接的。(https://www.cnblogs.com/0201zcr/p/4694945.html)
http底层是tcp协议。tcp协议的特点是在数据传输前进行三次握手,通信结束后四次挥手。那么http做一次通信时其实有 三次握手和四次挥手的通信时间开销。http默认使用短连接(即每次通信都建立连接和断开链接)。这就造成一个问题如果你又大量请求需要做,那么会造成一个问题,就是频发建立和断开链接。这不仅会造成时间上浪费你还会发现一个规律。当你使用大量线程做http请求时,你的请求成功返回结果和你的线程数量成反比和你设置超时时长成正比。如果你不对httpclient重用,可能或报如下错误Cannot assign requested address。导致问题的原因是 客户端频繁的连服务器,由于每次连接都在很短的时间内结束,导致很多的TIME_WAIT,以至于用光了可用的端 口号,所以新的连接没办法绑定端口,即“Cannot assign requested address”。是客户端的问题不是服务器端的问题。通过netstat,的确看到很多TIME_WAIT状态的连接。该问题的表面解决方式是:
执行命令修改如下2个内核参数 (需要root权限)
sysctl -w net.ipv4.tcp_timestamps=1 开启对于TCP时间戳的支持,若该项设置为0,则下面一项设置不起作用
sysctl -w net.ipv4.tcp_tw_recycle=1 表示开启TCP连接中TIME-WAIT sockets的快速回收
但是这种方式治标不治本。
那么正确的姿势是使用长连接。所谓的长连接就是 客户端和服务器点约定在单位时间内建立的链接下次数据到达可以接着数据传输,而不执行断开链接和建立链接操作。这样既节约系统端口资源,又节约通信时间,提高传输效率。减少系统资源消耗。
具体的介绍我就不介绍了,发现网上一个前辈的讲解蛮详细的直接贴个链接在下面,左拐。
https://www.cnblogs.com/bethunebtj/p/8493379.html
但是该实现方式:我提供了下面的代码。供大家参考。
package com.iflytek.migu.test;
import org.apache.http.*;
import org.apache.http.client.HttpRequestRetryHandler;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.ConnectTimeoutException;
import org.apache.http.conn.routing.HttpRoute;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.LayeredConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
import javax.net.ssl.SSLException;
import javax.net.ssl.SSLHandshakeException;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.net.UnknownHostException;
import java.nio.charset.Charset;
/**
* long connection tool class
*/
public class HttpClientContinueReq {
private PoolingHttpClientConnectionManagerpoolingHttpClientConnectionManager;
private HttpRequestRetryHandlerhttpRequestRetryHandler;
private RequestConfigrequestConfig;
private CloseableHttpClientcloseableHttpClient;
private int RETRY_TIMES =0;
private StringIP;
private int PORT;
private int MAX_POOL;
private int MAX_PRE_ROUTE;
private int MAX_ROUTE;
public RequestConfiggetRequestConfig() {
return requestConfig;
}
public HttpClientContinueReqsetRequestConfig(RequestConfig requestConfig) {
this.requestConfig = requestConfig;
return this;
}
public int getRetryTimes() {
return RETRY_TIMES;
}
public HttpClientContinueReqsetRetryTimes(int RETRY_TIMES) {
this.RETRY_TIMES = RETRY_TIMES;
return this;
}
public StringgetIp() {
return IP;
}
public HttpClientContinueReqsetIp(String IP) {
this.IP = IP;
return this;
}
public int getPort() {
return PORT;
}
public HttpClientContinueReqsetPort(int PORT) {
this.PORT = PORT;
return this;
}
public int getMaxPool() {
return MAX_POOL;
}
public HttpClientContinueReqsetMaxPool(int MAX_POOL) {
this.MAX_POOL = MAX_POOL;
return this;
}
public int getMaxPreRoute() {
return MAX_PRE_ROUTE;
}
public HttpClientContinueReqsetMaxPreRoute(int MAX_PRE_ROUTE) {
this.MAX_PRE_ROUTE = MAX_PRE_ROUTE;
return this;
}
public int getMaxRoute() {
return MAX_ROUTE;
}
public HttpClientContinueReqsetMaxRoute(int MAX_ROUTE) {
this.MAX_ROUTE = MAX_ROUTE;
return this;
}
public HttpClientContinueReq() {
}
public HttpClientContinueReq(String IP, int PORT, int MAX_POOL, int MAX_PRE_ROUTE, int MAX_ROUTE, RequestConfig requestConfig, int RETRY_TIMES) {
this.IP = IP;
this.PORT = PORT;
this.MAX_POOL = MAX_POOL;
this.MAX_PRE_ROUTE = MAX_PRE_ROUTE;
this.MAX_ROUTE = MAX_ROUTE;
this.requestConfig = requestConfig;
this.RETRY_TIMES = RETRY_TIMES;
}
/**
* user-defined Retry handler
*/
class MyHttpRequestRetryHandlerimplements HttpRequestRetryHandler {
@Override
public boolean retryRequest(IOException exception,
int executionCount, HttpContext context) {
if (executionCount >=RETRY_TIMES) {
return false;
}
if (exceptioninstanceof NoHttpResponseException) {
return true;
}
if (exceptioninstanceof SSLHandshakeException) {
return false;
}
if (exceptioninstanceof InterruptedIOException) {
return false;
}
if (exceptioninstanceof UnknownHostException) {
return false;
}
if (exceptioninstanceof ConnectTimeoutException) {
return false;
}
if (exceptioninstanceof SSLException) {
return false;
}
HttpClientContext clientContext = HttpClientContext.adapt(context);
HttpRequest request = clientContext.getRequest();
// 如果请求是幂等的,就再次尝试
if (!(requestinstanceof HttpEntityEnclosingRequest)) {
return true;
}
return false;
}
}
/**
* long connection client
*
* @return
*/
public CloseableHttpClientgetHttpClient() {
if (this.closeableHttpClient ==null) {
synchronized (this.closeableHttpClient) {
if (this.closeableHttpClient ==null) {
ConnectionSocketFactory connectionSocketFactory = PlainConnectionSocketFactory.getSocketFactory();
LayeredConnectionSocketFactory sslsf = SSLConnectionSocketFactory.getSocketFactory();
Registry registry = RegistryBuilder.create().register("http", connectionSocketFactory).register("https", sslsf).build();
poolingHttpClientConnectionManager =new PoolingHttpClientConnectionManager(registry);
poolingHttpClientConnectionManager.setMaxTotal(MAX_POOL);
poolingHttpClientConnectionManager.setDefaultMaxPerRoute(MAX_PRE_ROUTE);
HttpHost httpHost =new HttpHost(IP, PORT);
poolingHttpClientConnectionManager.setMaxPerRoute(new HttpRoute(httpHost), MAX_ROUTE);
httpRequestRetryHandler =new MyHttpRequestRetryHandler();
this.closeableHttpClient = HttpClients.custom().setConnectionManager(poolingHttpClientConnectionManager)
.setDefaultRequestConfig(requestConfig)
.setRetryHandler(httpRequestRetryHandler)
.build();
}
}
}
return closeableHttpClient;
}
/**
* 请求返回
*
* @param obj HttpGet or HttpPost object
* @param reqMethod the request method: GET OR POST.
* @return
*/
public StringgetResponse(Object obj, String reqMethod) {
String result =null;
CloseableHttpResponse httpResponse =null;
try {
httpResponse = reqMethod.equals("POST") ?this.getHttpClient().execute((HttpPost) obj) : reqMethod.equals("GET") ?this.getHttpClient().execute((HttpGet) obj) :null;
}catch (Exception e) {
e.printStackTrace();
}finally {
if (httpResponse !=null && httpResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
try {
result = EntityUtils.toString(httpResponse.getEntity(), Charset.forName("utf-8"));
EntityUtils.consume(httpResponse.getEntity());
}catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}
public static void main(String[] args) {
RequestConfig config = RequestConfig.custom().setConnectionRequestTimeout(2000).setConnectionRequestTimeout(2000).setSocketTimeout(2000).build();
HttpClientContinueReq req =new HttpClientContinueReq("127.0.0.1", 8080, 2000, 4, 1000, config, 3);
System.out.println(req.getResponse(new HttpGet("http://127.0.0.1:8080/index"), "GET"));
}
}