HTTP协议文本解析

阅读更多

HTTP消息是由普通ASCII文本组成。消息包括消息头和数据体部分。消息头以行为单位,每行以CRLF(回车和换行)结束,消息头结束后,额外增加一个CRLF,之后内容就是数据体部分了。

格式如图:

HTTP协议文本解析_第1张图片

                                                          http协议格式

那么如果给了一段文本 http文本  我们是否有能力用java解析呢:

GET /doaction?p=123&k=3343 HTTP/1.1
Host: localhost:12345
Connection: keep-alive
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36
Accept-Encoding: gzip, deflate, sdch
Accept-Language: zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4
Cookie: name=jackey;

  下面是我用java解析这段的http协议的一段代码

 

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
/**
 * http协议解析
 * @author Mic
 *
 */
public class HttpOperator {
	/**
	 * 协议流
	 */
   private InputStream source;
   private  byte[] buf;//缓存结果
   private  int pos;//偏移
   private int start;//开始
   private int end;//结束
   private int capacity;//缓存容量
   private int lastValid;//每次取数据buf中最有一位字节的下标
   private Request request=new Request();
   public enum METHOD{
	   GET,HEAD,POST,OPTIONS,PUT,Delete,TRACE,CONNECT
   }
   public HttpOperator(int capacity) {
	 pos=0;
	 start=0;
	 end=0;
	 lastValid=-1;
	 this.capacity=capacity;
	 buf=new byte[capacity];
   }
   public HttpOperator() {
       this(4096);
   }
   /**
    *填充缓存
    */
   public boolean  fill(){
	   return fill(buf, 0, capacity);
   }
   public boolean fill(byte[] buf,int offset,int length){
	   int len=-1;
	   try {
		 len=source.read(buf, offset, length);	   
	   }catch (IOException e) {
		  System.out.println("read source failed");
		  return false;
	   }
	   start=0;
	   end=0;
	   lastValid=len==-1?-1:len-1;
	   return len!=-1;
   
   }
   public void skipSpace(){
	   do{		  
		   if(pos>=lastValid){
			   fill();
			   pos=-1;
		   }
		   pos++;
	   }while(buf[pos]==Constant.SP||buf[pos]==Constant.HT);
   }
   
   
   public void  copy(byte[] src ,int srcOff,byte[] dest,int destOff,int len){
	  //扩容
	  if((dest.length-destOff)=lastValid){
				  copy(buf, start,v_buf,v_pos,pos-start);
				  v_pos+=(pos-start);
				  v_len+=(pos-start);
				  start=pos=end=0;
				  fill();
			  }
		   }
			  try {
				request.setMethod(new String(v_buf,0,v_len,"UTF-8"));
			} catch (UnsupportedEncodingException e) {
				 System.out.println("set method failed");
				 throw  new RuntimeException(e);
			}
		
			  
		  //URI
			v_buf=new byte[1024];
			v_pos=0;
			pos--;
			skipSpace();
			start=pos;
			v_len=0;
			while(true){
				   if(buf[pos]==Constant.SP||buf[pos]==Constant.HT){
					   copy(buf, start,v_buf,v_pos,pos-start);
					   v_pos+=(pos-start);
					   v_len+=(pos-start);
					   break;
				   }   
				   end=pos;
				   pos++;
				   //判断是否用完
				  if(pos>=lastValid){
					  copy(buf, start,v_buf,v_pos,pos-start);
					  v_pos+=(pos-start);
					  v_len+=(pos-start);
					  start=pos=end=0;
					  fill();
				  }
			   }
			   try {
					request.setUri(new String(v_buf,0,v_len,"UTF-8"));
				} catch (UnsupportedEncodingException e) {
					 System.out.println("set URI failed");
					 throw  new RuntimeException(e);
				}
			   
			   //协议版本如HTTP/1.1
				v_buf=new byte[1024];
				v_pos=0;
				pos--;
				skipSpace();
				start=pos;
				v_len=0;
				while(true){
					   if(buf[pos]==Constant.LF||buf[end]==Constant.CR){
						   copy(buf, start,v_buf,v_pos,end-start);
						   v_pos+=(pos-start);
						   v_len+=(end-start);
						   quit=true;
						   break;
					   }else if(buf[pos]==Constant.HT||buf[pos]==Constant.SP){
						   skipSpace();
						   continue;
					   }
					   
					   end=pos;
					   pos++;
					   //判断是否用完
					  if(pos>=lastValid){
						  copy(buf, start,v_buf,v_pos,pos-start);
						  v_pos+=(pos-start);
						  v_len+=(pos-start);
						  start=pos=end=0;
						  fill();
					  }
				   }
					  try {
						request.setHttpVersion(new String(v_buf,0,v_len,"UTF-8"));
					} catch (UnsupportedEncodingException e) {
						 System.out.println("set HttpVersion failed");
						 throw  new RuntimeException(e);
					}
				
		   }
}
   public void parseRequestHeaders(){
	   while(parseRequestHeader()){
		   
	   }
   }
   private boolean parseRequestHeader(){
	   boolean quit=true;
		   skipSpace();
		   //Method
		   byte[] v_buf=new byte[1024];
		   int v_pos=0;
		   int v_len=0;
		   start=pos;
		   //name
		   HttpHeader httpHeader=null;
		   while(true){
			   if(buf[pos]==Constant.COLON){
				   copy(buf, start,v_buf,v_pos,pos-start);
				   v_pos+=(pos-start);
				   v_len+=(pos-start);
				   break;
			   }   
			   end=pos;
			   pos++;
			   //判断是否用完
			  if(pos>=lastValid){
				  copy(buf, start,v_buf,v_pos,pos-start);
				  v_pos+=(pos-start);
				  v_len+=(pos-start);
				  start=pos=end=0;
				  fill();
			  }
		   }
		   try {
			httpHeader=HttpHeader.addHeaderName(new String(v_buf,0,v_len,"UTF-8"));
		} catch (UnsupportedEncodingException e1) {
		    return false;
		}
	     //Value
				v_buf=new byte[1024];
				v_pos=0;
				skipSpace();
				start=pos;
				v_len=0;
				while(true){
					   if(buf[pos]==Constant.LF||buf[end]==Constant.CR){
						   copy(buf, start,v_buf,v_pos,end-start);
						   v_pos+=(pos-start);
						   v_len+=(end-start);
						   break;
					   }else if(buf[pos]==Constant.HT||buf[pos]==Constant.SP){
						   skipSpace();
						   continue;
					   }
					   
					   end=pos;
					   pos++;
					   //判断是否用完
					  if(pos>=lastValid){
						  copy(buf, start,v_buf,v_pos,pos-start);
						  v_pos+=(pos-start);
						  v_len+=(pos-start);
						  start=pos=end=0;
						  fill();
					  }
				   }
			 try {
				   httpHeader.setValue(new String(v_buf,0,v_len,"UTF-8"));
			 	} catch (UnsupportedEncodingException e) {
						 System.out.println("set HttpVersion failed");
						 throw  new RuntimeException(e);
		  }
			 request.addHeader(httpHeader);
			 //是否完成
			if(pos>=lastValid){
				  start=pos=end=0;
				  fill();
			}
			if(buf[pos+1]==Constant.CR||buf[pos+2]==Constant.LF){
				quit=false;
				pos+=3;
			}
	   return quit;
}
 
  public void parseRequestBody(){
    	if(pos<=lastValid){
    		byte[] body=new byte[1024];
    		copy(buf, pos,body,0,lastValid-pos+1);
    		while(fill(buf, 0, capacity)){
    			  copy(buf, 0,body,0,lastValid+1);	
    		}
    		request.setBody(body);
    	} 
    	
  }
   
   
public InputStream getSource() {
	return source;
}
public void setSource(InputStream source) {
	this.source = source;
}
public Request getRequest() {
	return request;
}
public void setRequest(Request request) {
	this.request = request;
}
	   
	   
   }
 

 HttpOperator 用来来解析http协议的一个类,其中三个关键方法:parseRequestLine 请求行的解析,parseRequestHeaders 请求头解析,parseRequestBody请求包体解析

 

而解析后的内容用Request来封装

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

/**
 * 所获得的请求
 * @author Mic
 *
 */
public class Request implements Serializable{
 //请求方法
 private String method;
 //协议版本
 private String httpVersion;
 //uri
 private String uri;
 //域头
 private List httpheaders;
 //body
 private byte[] body;
public String getMethod() {
	return method;
}
public void setMethod(String method) {
	this.method = method;
}

public String getHttpVersion() {
	return httpVersion;
}
public void setHttpVersion(String httpVersion) {
	this.httpVersion = httpVersion;
}
public String getUri() {
	return uri;
}
public void setUri(String uri) {
	this.uri = uri;
}
public List getHttpheaders() {
	return httpheaders;
}
public void setHttpheaders(List httpheaders) {
	this.httpheaders = httpheaders;
}
public byte[] getBody() {
	return body;
}
public void setBody(byte[] body) {
	this.body = body;
}
 
public void addHeader(HttpHeader header){
	if(httpheaders==null) httpheaders=new ArrayList();
	httpheaders.add(header);
}
public void addHeader(String name,Object value){
	if(httpheaders==null) httpheaders=new ArrayList();
	httpheaders.add(new HttpHeader(name,value));
}
}

 下面是其他的一些附加类

public class Constant {
  /**
  * CRLF.
  */
  public final static String CRLF="\r\n";
  /**
   * CR.
   */
  public final static byte CR=(byte)'\r';
  /**
   * LF
   */
  public final static byte LF=(byte) '\n';
  /**
   * SP.
   */
  public final static byte SP=(byte) ' ';
  /**
   * HT.
   */
  public final static byte HT=(byte) '\t';
  /**
   * COLON.
   */
  public final static byte COLON=(byte) ':';
  /**
   * SEMI_COLON
   */
  public final static byte SEMI_COLON=(byte) ';';
  

  /**
   * 'A'.
   */
  public static final byte A = (byte) 'A';


  /**
   * 'a'.
   */
  public static final byte a = (byte) 'a';


  /**
   * 'Z'.
   */
  public static final byte Z = (byte) 'Z';

 

public class HttpHeader {
  private String name;//域头名
  private Object value;//域头值
  public String getName() {
		return name;
  }
  
  public HttpHeader(String name, Object value) {
	super();
	this.name = name;
	this.value = value;
}

public HttpHeader(String name) {
	super();
	this.name = name;
}

public void setName(String name) {
		this.name = name;
   }
   public Object getValue() {
		return value;
   }
   public void setValue(Object value) {
		this.value = value;
  }
  public static HttpHeader addHeaderName(String name){
	  return new HttpHeader(name);
  }

@Override
public String toString() {
	return "HttpHeader [name=" + name + ", value=" + value + "]";
}
 
}

 通过上述类我做了一段测试代码如下:

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.Arrays;
import java.util.List;

import org.junit.Before;
import org.junit.Test;

public class HttpOperatorTest {
	HttpOperator operator;
	@Before
	public void setup() throws FileNotFoundException{
		operator=new HttpOperator(1024);
		FileInputStream fis=new FileInputStream(new File(System.getProperty("user.dir"),"resource/httpRequest.txt"));
		operator.setSource(fis);
	}
	@Test
	public void test() {
	    operator.parseRequestLine();
	    Request request=operator.getRequest();
	    System.out.println("Method:"+request.getMethod());
	    System.out.println("URI:"+request.getUri());
	    System.out.println("HttpVersion:"+request.getHttpVersion());
	    System.out.println("=====================parseHeaders=====");
	    operator.parseRequestHeaders();
	   List httpHeaders=request.getHttpheaders();
	   if(httpHeaders!=null&&! httpHeaders.isEmpty()){
		   for(HttpHeader header:httpHeaders)
			   System.out.println(header.getName()+":"+header.getValue());
	   }
	   operator.parseRequestBody();
	   System.out.println("=====================parseBody=====");
	   System.out.println(Arrays.toString(request.getBody()));
	}

}

 测试结果如下:

测试结果 写道
Method:GET
URI:/doaction?p=123&k=3343
HttpVersion:HTTP/1.1
=====================parseHeaders=====
Host:localhost:12345
Connection:keep-alive
Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36
Accept-Encoding:gzip, deflate, sdch
Accept-Language:zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4
Cookie:name=jackey;
=====================parseBody=====
null

 

  • HTTP协议文本解析_第2张图片
  • 大小: 87.1 KB
  • 查看图片附件

你可能感兴趣的:(HTTP协议,java)