服务端收到client发送的Http数据后怎么解析成具体的Request对象呢?下面来看看golang是如何处理的
首先看一个具体应用实例:仅仅包含HTTP里面的Method,URL和Proto
package main import ( "bufio" "fmt" _ "io" "net/http" "net/textproto" "strings" ) func main() { request() } //当然这个实例比较简单,仅仅是通过\n来解析出几个字段而已 func request() { paths := "/xxx" br := bufio.NewReader(strings.NewReader("GET " + paths + " HTTP/1.1\r\nHost: test\r\n\r\n")) tp := textproto.NewReader(br) var s string var err error if s, err = tp.ReadLine(); err != nil { fmt.Printf("prase err: %v \n", err) } fmt.Printf("readLine restult: %v \n", s) req := new(http.Request) req.Method, req.RequestURI, req.Proto, _ = parseRequestLine(s) fmt.Printf("prase result %v \n", req) } func parseRequestLine(line string) (method, requestURI, proto string, ok bool) { s1 := strings.Index(line, " ") s2 := strings.Index(line[s1+1:], " ") if s1 < 0 || s2 < 0 { return } s2 += s1 + 1 return line[:s1], line[s1+1 : s2], line[s2+1:], true } //--------------------------------输出结果为: //readLine restult: GET /xxx HTTP/1.1 //prase result &{GET <nil> HTTP/1.1 0 0 map[] <nil> 0 [] false map[] map[] <nil> map[] /xxx <nil>}
再来深入看看golang的http源码
func ReadRequest(b *bufio.Reader) (req *Request, err error) { tp := newTextprotoReader(b) //封装一个Reader结构 req = new(Request) // First line: GET /index.html HTTP/1.0 //最开始的一行信息的解析 var s string if s, err = tp.ReadLine(); err != nil { return nil, err } defer func() { putTextprotoReader(tp) if err == io.EOF { err = io.ErrUnexpectedEOF } }() var ok bool //最开始的一行信息的解析 req.Method, req.RequestURI, req.Proto, ok = parseRequestLine(s) if !ok { return nil, &badStringError{"malformed HTTP request", s} } rawurl := req.RequestURI //client请求的URL //HTTP对应的ProtoMajor和ProtoMinor if req.ProtoMajor, req.ProtoMinor, ok = ParseHTTPVersion(req.Proto); !ok { return nil, &badStringError{"malformed HTTP version", req.Proto} } // CONNECT requests are used two different ways, and neither uses a full URL: // The standard use is to tunnel HTTPS through an HTTP proxy. // It looks like "CONNECT www.google.com:443 HTTP/1.1", and the parameter is // just the authority section of a URL. This information should go in req.URL.Host. // // The net/rpc package also uses CONNECT, but there the parameter is a path // that starts with a slash. It can be parsed with the regular URL parser, // and the path will end up in req.URL.Path, where it needs to be in order for // RPC to work. //处理Method == "CONNECT" 的具体情况 justAuthority := req.Method == "CONNECT" && !strings.HasPrefix(rawurl, "/") if justAuthority { rawurl = "http://" + rawurl } //HTTP URL解析 if req.URL, err = url.ParseRequestURI(rawurl); err != nil { return nil, err } if justAuthority { // Strip the bogus "http://" back off. req.URL.Scheme = "" } // Subsequent lines: Key: value. mimeHeader, err := tp.ReadMIMEHeader() //HTTP Header中的key-value数据 if err != nil { return nil, err } req.Header = Header(mimeHeader) // RFC2616: Must treat // GET /index.html HTTP/1.1 // Host: www.google.com // and // GET http://www.google.com/index.html HTTP/1.1 // Host: doesntmatter // the same. In the second case, any Host line is ignored. req.Host = req.URL.Host if req.Host == "" { req.Host = req.Header.get("Host") } delete(req.Header, "Host") //把Header里面的map中的Host去掉 fixPragmaCacheControl(req.Header) //设置Cache-Control字段 err = readTransfer(req, b) if err != nil { return nil, err } req.Close = shouldClose(req.ProtoMajor, req.ProtoMinor, req.Header, false) //是否需要关闭 return req, nil }
ParseHTTPVersion(解析HTTP的ProtoMajor 和 ProtoMinor)
func ParseHTTPVersion(vers string) (major, minor int, ok bool) { const Big = 1000000 // arbitrary upper bound switch vers { case "HTTP/1.1": return 1, 1, true case "HTTP/1.0": return 1, 0, true } if !strings.HasPrefix(vers, "HTTP/") { return 0, 0, false } dot := strings.Index(vers, ".") if dot < 0 { return 0, 0, false } major, err := strconv.Atoi(vers[5:dot]) if err != nil || major < 0 || major > Big { return 0, 0, false } minor, err = strconv.Atoi(vers[dot+1:]) if err != nil || minor < 0 || minor > Big { return 0, 0, false } return major, minor, true }
设置 Cache-Control字段
func fixPragmaCacheControl(header Header) { if hp, ok := header["Pragma"]; ok && len(hp) > 0 && hp[0] == "no-cache" { if _, presentcc := header["Cache-Control"]; !presentcc { header["Cache-Control"] = []string{"no-cache"} } } }
是否需要关闭
func shouldClose(major, minor int, header Header, removeCloseHeader bool) bool { if major < 1 { return true } else if major == 1 && minor == 0 { //这种情况判断长连接 if !strings.Contains(strings.ToLower(header.get("Connection")), "keep-alive") { return true } return false } else { // TODO: Should split on commas, toss surrounding white space, // and check each field. //判断Connection字段 if strings.ToLower(header.get("Connection")) == "close" { if removeCloseHeader { header.Del("Connection") } return true } } return false }
比较长的区分Request和Response的readTransfer方法
// msg is *Request or *Response. func readTransfer(msg interface{}, r *bufio.Reader) (err error) { t := &transferReader{RequestMethod: "GET"} // Unify input isResponse := false switch rr := msg.(type) { case *Response: t.Header = rr.Header t.StatusCode = rr.StatusCode t.ProtoMajor = rr.ProtoMajor t.ProtoMinor = rr.ProtoMinor t.Close = shouldClose(t.ProtoMajor, t.ProtoMinor, t.Header, true) isResponse = true if rr.Request != nil { t.RequestMethod = rr.Request.Method } case *Request: t.Header = rr.Header t.ProtoMajor = rr.ProtoMajor t.ProtoMinor = rr.ProtoMinor // Transfer semantics for Requests are exactly like those for // Responses with status code 200, responding to a GET method t.StatusCode = 200 default: panic("unexpected type") } // Default to HTTP/1.1 if t.ProtoMajor == 0 && t.ProtoMinor == 0 { t.ProtoMajor, t.ProtoMinor = 1, 1 } // Transfer encoding, content length t.TransferEncoding, err = fixTransferEncoding(t.RequestMethod, t.Header) if err != nil { return err } realLength, err := fixLength(isResponse, t.StatusCode, t.RequestMethod, t.Header, t.TransferEncoding) if err != nil { return err } if isResponse && t.RequestMethod == "HEAD" { if n, err := parseContentLength(t.Header.get("Content-Length")); err != nil { return err } else { t.ContentLength = n } } else { t.ContentLength = realLength } // Trailer t.Trailer, err = fixTrailer(t.Header, t.TransferEncoding) if err != nil { return err } // If there is no Content-Length or chunked Transfer-Encoding on a *Response // and the status is not 1xx, 204 or 304, then the body is unbounded. // See RFC2616, section 4.4. switch msg.(type) { case *Response: if realLength == -1 && !chunked(t.TransferEncoding) && bodyAllowedForStatus(t.StatusCode) { // Unbounded body. t.Close = true } } // Prepare body reader. ContentLength < 0 means chunked encoding // or close connection when finished, since multipart is not supported yet //注意这里ContentLength < 0 表示 chunked encoding 或者已经接受完数据,关闭连接 switch { case chunked(t.TransferEncoding): if noBodyExpected(t.RequestMethod) { t.Body = eofReader } else { t.Body = &body{src: internal.NewChunkedReader(r), hdr: msg, r: r, closing: t.Close} } case realLength == 0: t.Body = eofReader case realLength > 0: t.Body = &body{src: io.LimitReader(r, realLength), closing: t.Close} default: // realLength < 0, i.e. "Content-Length" not mentioned in header if t.Close { // Close semantics (i.e. HTTP/1.0) t.Body = &body{src: r, closing: t.Close} } else { // Persistent connection (i.e. HTTP/1.1) t.Body = eofReader } } // Unify output switch rr := msg.(type) { case *Request: rr.Body = t.Body rr.ContentLength = t.ContentLength rr.TransferEncoding = t.TransferEncoding rr.Close = t.Close rr.Trailer = t.Trailer case *Response: rr.Body = t.Body rr.ContentLength = t.ContentLength rr.TransferEncoding = t.TransferEncoding rr.Close = t.Close rr.Trailer = t.Trailer } return nil }
总结:如果自己想动手写HTTP服务器,这些一些基础性的处理工作可以借鉴一下