HTTP消息是由普通ASCII文本组成。消息包括消息头和数据体部分。消息头以行为单位,每行以CRLF(回车和换行)结束,消息头结束后,额外增加一个CRLF,之后内容就是数据体部分了。
格式如图:
http协议格式
那么如果给了一段文本 http文本 我们是否有能力用java解析呢:
GET /doaction?p=123&k=3343 HTTP/1.1
Host: localhost:12345
Connection: keep-alive
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36
Accept-Encoding: gzip, deflate, sdch
Accept-Language: zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4
Cookie: name=jackey;
Host: localhost:12345
Connection: keep-alive
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36
Accept-Encoding: gzip, deflate, sdch
Accept-Language: zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4
Cookie: name=jackey;
下面是我用java解析这段的http协议的一段代码
import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; /** * http协议解析 * @author Mic * */ public class HttpOperator { /** * 协议流 */ private InputStream source; private byte[] buf;//缓存结果 private int pos;//偏移 private int start;//开始 private int end;//结束 private int capacity;//缓存容量 private int lastValid;//每次取数据buf中最有一位字节的下标 private Request request=new Request(); public enum METHOD{ GET,HEAD,POST,OPTIONS,PUT,Delete,TRACE,CONNECT } public HttpOperator(int capacity) { pos=0; start=0; end=0; lastValid=-1; this.capacity=capacity; buf=new byte[capacity]; } public HttpOperator() { this(4096); } /** *填充缓存 */ public boolean fill(){ return fill(buf, 0, capacity); } public boolean fill(byte[] buf,int offset,int length){ int len=-1; try { len=source.read(buf, offset, length); }catch (IOException e) { System.out.println("read source failed"); return false; } start=0; end=0; lastValid=len==-1?-1:len-1; return len!=-1; } public void skipSpace(){ do{ if(pos>=lastValid){ fill(); pos=-1; } pos++; }while(buf[pos]==Constant.SP||buf[pos]==Constant.HT); } public void copy(byte[] src ,int srcOff,byte[] dest,int destOff,int len){ //扩容 if((dest.length-destOff)=lastValid){ copy(buf, start,v_buf,v_pos,pos-start); v_pos+=(pos-start); v_len+=(pos-start); start=pos=end=0; fill(); } } try { request.setMethod(new String(v_buf,0,v_len,"UTF-8")); } catch (UnsupportedEncodingException e) { System.out.println("set method failed"); throw new RuntimeException(e); } //URI v_buf=new byte[1024]; v_pos=0; pos--; skipSpace(); start=pos; v_len=0; while(true){ if(buf[pos]==Constant.SP||buf[pos]==Constant.HT){ copy(buf, start,v_buf,v_pos,pos-start); v_pos+=(pos-start); v_len+=(pos-start); break; } end=pos; pos++; //判断是否用完 if(pos>=lastValid){ copy(buf, start,v_buf,v_pos,pos-start); v_pos+=(pos-start); v_len+=(pos-start); start=pos=end=0; fill(); } } try { request.setUri(new String(v_buf,0,v_len,"UTF-8")); } catch (UnsupportedEncodingException e) { System.out.println("set URI failed"); throw new RuntimeException(e); } //协议版本如HTTP/1.1 v_buf=new byte[1024]; v_pos=0; pos--; skipSpace(); start=pos; v_len=0; while(true){ if(buf[pos]==Constant.LF||buf[end]==Constant.CR){ copy(buf, start,v_buf,v_pos,end-start); v_pos+=(pos-start); v_len+=(end-start); quit=true; break; }else if(buf[pos]==Constant.HT||buf[pos]==Constant.SP){ skipSpace(); continue; } end=pos; pos++; //判断是否用完 if(pos>=lastValid){ copy(buf, start,v_buf,v_pos,pos-start); v_pos+=(pos-start); v_len+=(pos-start); start=pos=end=0; fill(); } } try { request.setHttpVersion(new String(v_buf,0,v_len,"UTF-8")); } catch (UnsupportedEncodingException e) { System.out.println("set HttpVersion failed"); throw new RuntimeException(e); } } } public void parseRequestHeaders(){ while(parseRequestHeader()){ } } private boolean parseRequestHeader(){ boolean quit=true; skipSpace(); //Method byte[] v_buf=new byte[1024]; int v_pos=0; int v_len=0; start=pos; //name HttpHeader httpHeader=null; while(true){ if(buf[pos]==Constant.COLON){ copy(buf, start,v_buf,v_pos,pos-start); v_pos+=(pos-start); v_len+=(pos-start); break; } end=pos; pos++; //判断是否用完 if(pos>=lastValid){ copy(buf, start,v_buf,v_pos,pos-start); v_pos+=(pos-start); v_len+=(pos-start); start=pos=end=0; fill(); } } try { httpHeader=HttpHeader.addHeaderName(new String(v_buf,0,v_len,"UTF-8")); } catch (UnsupportedEncodingException e1) { return false; } //Value v_buf=new byte[1024]; v_pos=0; skipSpace(); start=pos; v_len=0; while(true){ if(buf[pos]==Constant.LF||buf[end]==Constant.CR){ copy(buf, start,v_buf,v_pos,end-start); v_pos+=(pos-start); v_len+=(end-start); break; }else if(buf[pos]==Constant.HT||buf[pos]==Constant.SP){ skipSpace(); continue; } end=pos; pos++; //判断是否用完 if(pos>=lastValid){ copy(buf, start,v_buf,v_pos,pos-start); v_pos+=(pos-start); v_len+=(pos-start); start=pos=end=0; fill(); } } try { httpHeader.setValue(new String(v_buf,0,v_len,"UTF-8")); } catch (UnsupportedEncodingException e) { System.out.println("set HttpVersion failed"); throw new RuntimeException(e); } request.addHeader(httpHeader); //是否完成 if(pos>=lastValid){ start=pos=end=0; fill(); } if(buf[pos+1]==Constant.CR||buf[pos+2]==Constant.LF){ quit=false; pos+=3; } return quit; } public void parseRequestBody(){ if(pos<=lastValid){ byte[] body=new byte[1024]; copy(buf, pos,body,0,lastValid-pos+1); while(fill(buf, 0, capacity)){ copy(buf, 0,body,0,lastValid+1); } request.setBody(body); } } public InputStream getSource() { return source; } public void setSource(InputStream source) { this.source = source; } public Request getRequest() { return request; } public void setRequest(Request request) { this.request = request; } }
HttpOperator 用来来解析http协议的一个类,其中三个关键方法:parseRequestLine 请求行的解析,parseRequestHeaders 请求头解析,parseRequestBody请求包体解析
而解析后的内容用Request来封装
import java.io.Serializable; import java.util.ArrayList; import java.util.List; /** * 所获得的请求 * @author Mic * */ public class Request implements Serializable{ //请求方法 private String method; //协议版本 private String httpVersion; //uri private String uri; //域头 private Listhttpheaders; //body private byte[] body; public String getMethod() { return method; } public void setMethod(String method) { this.method = method; } public String getHttpVersion() { return httpVersion; } public void setHttpVersion(String httpVersion) { this.httpVersion = httpVersion; } public String getUri() { return uri; } public void setUri(String uri) { this.uri = uri; } public List getHttpheaders() { return httpheaders; } public void setHttpheaders(List httpheaders) { this.httpheaders = httpheaders; } public byte[] getBody() { return body; } public void setBody(byte[] body) { this.body = body; } public void addHeader(HttpHeader header){ if(httpheaders==null) httpheaders=new ArrayList (); httpheaders.add(header); } public void addHeader(String name,Object value){ if(httpheaders==null) httpheaders=new ArrayList (); httpheaders.add(new HttpHeader(name,value)); } }
下面是其他的一些附加类
public class Constant { /** * CRLF. */ public final static String CRLF="\r\n"; /** * CR. */ public final static byte CR=(byte)'\r'; /** * LF */ public final static byte LF=(byte) '\n'; /** * SP. */ public final static byte SP=(byte) ' '; /** * HT. */ public final static byte HT=(byte) '\t'; /** * COLON. */ public final static byte COLON=(byte) ':'; /** * SEMI_COLON */ public final static byte SEMI_COLON=(byte) ';'; /** * 'A'. */ public static final byte A = (byte) 'A'; /** * 'a'. */ public static final byte a = (byte) 'a'; /** * 'Z'. */ public static final byte Z = (byte) 'Z';
public class HttpHeader { private String name;//域头名 private Object value;//域头值 public String getName() { return name; } public HttpHeader(String name, Object value) { super(); this.name = name; this.value = value; } public HttpHeader(String name) { super(); this.name = name; } public void setName(String name) { this.name = name; } public Object getValue() { return value; } public void setValue(Object value) { this.value = value; } public static HttpHeader addHeaderName(String name){ return new HttpHeader(name); } @Override public String toString() { return "HttpHeader [name=" + name + ", value=" + value + "]"; } }
通过上述类我做了一段测试代码如下:
import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.util.Arrays; import java.util.List; import org.junit.Before; import org.junit.Test; public class HttpOperatorTest { HttpOperator operator; @Before public void setup() throws FileNotFoundException{ operator=new HttpOperator(1024); FileInputStream fis=new FileInputStream(new File(System.getProperty("user.dir"),"resource/httpRequest.txt")); operator.setSource(fis); } @Test public void test() { operator.parseRequestLine(); Request request=operator.getRequest(); System.out.println("Method:"+request.getMethod()); System.out.println("URI:"+request.getUri()); System.out.println("HttpVersion:"+request.getHttpVersion()); System.out.println("=====================parseHeaders====="); operator.parseRequestHeaders(); ListhttpHeaders=request.getHttpheaders(); if(httpHeaders!=null&&! httpHeaders.isEmpty()){ for(HttpHeader header:httpHeaders) System.out.println(header.getName()+":"+header.getValue()); } operator.parseRequestBody(); System.out.println("=====================parseBody====="); System.out.println(Arrays.toString(request.getBody())); } }
测试结果如下:
测试结果 写道
Method:GET
URI:/doaction?p=123&k=3343
HttpVersion:HTTP/1.1
=====================parseHeaders=====
Host:localhost:12345
Connection:keep-alive
Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36
Accept-Encoding:gzip, deflate, sdch
Accept-Language:zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4
Cookie:name=jackey;
=====================parseBody=====
null
URI:/doaction?p=123&k=3343
HttpVersion:HTTP/1.1
=====================parseHeaders=====
Host:localhost:12345
Connection:keep-alive
Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36
Accept-Encoding:gzip, deflate, sdch
Accept-Language:zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4
Cookie:name=jackey;
=====================parseBody=====
null