通过dial "operation was canceled"错误来看go http client获取连接conn过程

下面是演示代码(不一定能复现):
client端演示代码:

package main

import (
    "context"
    "fmt"
    "io"
    "math"
    "net"
    "net/http"
    "sync"
    "time"
)

func main() {

    dialer := &net.Dialer{
        Timeout:   10 * time.Second,
        KeepAlive: 15 * time.Second,
    }

    tr := &http.Transport{
        MaxIdleConnsPerHost: math.MaxInt,
        DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
            before := time.Now()
            conn, err := dialer.DialContext(ctx, network, addr)
            if err != nil {
              // 这里报错operation was canceled
                fmt.Printf("dial err=%v,cost=%v[ms]\n", err, time.Now().Sub(before).Milliseconds())
            }
            return conn, err
        },
    }

    httpClient := &http.Client{
        Transport: tr,
        Timeout:   10 * time.Second,
    }

    var wg sync.WaitGroup

    maxGoroutines := 100
    eachRunTimes := 100000
    for i := 0; i < maxGoroutines; i++ {
        wg.Add(1)
        go func() {
            for j := 0; j < eachRunTimes; j++ {
                head(httpClient)
            }
            wg.Done()
        }()
    }
    wg.Wait()
}

func head(client *http.Client) {
    resp, err := client.Head("http://localhost:9080/nop")
    if err != nil {
        // 这里并没有报错
        fmt.Printf("httpClient.Head err=%v\n", err)
        return
    }
    io.Copy(io.Discard, resp.Body)
    defer resp.Body.Close()
}

server端演示代码:

package main

import (
    "net/http"
)

func main() {

    http.HandleFunc("/nop", func(writer http.ResponseWriter, request *http.Request) {
        writer.WriteHeader(200)
    })

    http.ListenAndServe(":9080", nil)
}

然后我们发现Dial 报错operation was canceled,但是client.Head(其实就是client.Do)确没有返回错误,没有把错误抛出给外部的http client,这是为什么呢,我们通过源码来分析,顺便引出client每次获得连接的过程:
通过dial

我们先来看下DialContext提到了RoundTrip会同时从idle和dial获取,如果idle比DialContext优先返回获得就会使用idleConn

    // DialContext specifies the dial function for creating unencrypted TCP connections.
    // If DialContext is nil (and the deprecated Dial below is also nil),
    // then the transport dials using package net.
    //
    // DialContext runs concurrently with calls to RoundTrip.
    // A RoundTrip call that initiates a dial may end up using
    // a connection dialed previously when the earlier connection
    // becomes idle before the later DialContext completes.
    DialContext func(ctx context.Context, network, addr string) (net.Conn, error)

getConn是来获取连接:

// getConn dials and creates a new persistConn to the target as
// specified in the connectMethod. This includes doing a proxy CONNECT
// and/or setting up TLS.  If this doesn't return an error, the persistConn
// is ready to write requests to.
func (t *Transport) getConn(treq *transportRequest, cm connectMethod) (pc *persistConn, err error) {
    req := treq.Request
    trace := treq.trace
    ctx := req.Context()
    if trace != nil && trace.GetConn != nil {
        trace.GetConn(cm.addr())
    }

    w := &wantConn{
        cm:         cm,
        key:        cm.key(),
        ctx:        ctx,
        ready:      make(chan struct{}, 1),
        beforeDial: testHookPrePendingDial,
        afterDial:  testHookPostPendingDial,
    }
    defer func() {
        if err != nil {
            w.cancel(t, err)
        }
    }()

  // 这里从idle获取连接
    // Queue for idle connection.
    if delivered := t.queueForIdleConn(w); delivered {
        pc := w.pc
        // Trace only for HTTP/1.
        // HTTP/2 calls trace.GotConn itself.
        if pc.alt == nil && trace != nil && trace.GotConn != nil {
            trace.GotConn(pc.gotIdleConnTrace(pc.idleAt))
        }
        // set request canceler to some non-nil function so we
        // can detect whether it was cleared between now and when
        // we enter roundTrip
        t.setReqCanceler(treq.cancelKey, func(error) {})
        return pc, nil
    }

    cancelc := make(chan error, 1)
    t.setReqCanceler(treq.cancelKey, func(err error) { cancelc <- err })

  // 这里从idle获取连接
    // Queue for permission to dial.
    t.queueForDial(w)

重点看下idle怎么获取连接的

// queueForIdleConn queues w to receive the next idle connection for w.cm.
// As an optimization hint to the caller, queueForIdleConn reports whether
// it successfully delivered an already-idle connection.
func (t *Transport) queueForIdleConn(w *wantConn) (delivered bool) {
    if t.DisableKeepAlives {
        return false
    }

    t.idleMu.Lock()
    defer t.idleMu.Unlock()

    // Stop closing connections that become idle - we might want one.
    // (That is, undo the effect of t.CloseIdleConnections.)
    t.closeIdle = false

    if w == nil {
        // Happens in test hook.
        return false
    }

    // If IdleConnTimeout is set, calculate the oldest
    // persistConn.idleAt time we're willing to use a cached idle
    // conn.
    var oldTime time.Time
    if t.IdleConnTimeout > 0 {
        oldTime = time.Now().Add(-t.IdleConnTimeout)
    }

  // 从idleConn查询是否有现成可用的conn
    // Look for most recently-used idle connection.
    if list, ok := t.idleConn[w.key]; ok {
        stop := false
        delivered := false
        for len(list) > 0 && !stop {
            pconn := list[len(list)-1]

            // See whether this connection has been idle too long, considering
            // only the wall time (the Round(0)), in case this is a laptop or VM
            // coming out of suspend with previously cached idle connections.
            tooOld := !oldTime.IsZero() && pconn.idleAt.Round(0).Before(oldTime)
            if tooOld {
                // Async cleanup. Launch in its own goroutine (as if a
                // time.AfterFunc called it); it acquires idleMu, which we're
                // holding, and does a synchronous net.Conn.Close.
                go pconn.closeConnIfStillIdle()
            }
            if pconn.isBroken() || tooOld {
                // If either persistConn.readLoop has marked the connection
                // broken, but Transport.removeIdleConn has not yet removed it
                // from the idle list, or if this persistConn is too old (it was
                // idle too long), then ignore it and look for another. In both
                // cases it's already in the process of being closed.
                list = list[:len(list)-1]
                continue
            }
            delivered = w.tryDeliver(pconn, nil)
            if delivered {
                if pconn.alt != nil {
                    // HTTP/2: multiple clients can share pconn.
                    // Leave it in the list.
                } else {
                    // HTTP/1: only one client can use pconn.
                    // Remove it from the list.
                    t.idleLRU.remove(pconn)
                    list = list[:len(list)-1]
                }
            }
            stop = true
        }
        if len(list) > 0 {
            t.idleConn[w.key] = list
        } else {
            delete(t.idleConn, w.key)
        }
        if stop {
            return delivered
        }
    }

  // 最重要的是这里放入idleConnWait,这样下次有idle回收时通过tryPutIdleConn会优先激活等待连接
    // Register to receive next connection that becomes idle.
    if t.idleConnWait == nil {
        t.idleConnWait = make(map[connectMethodKey]wantConnQueue)
    }
    q := t.idleConnWait[w.key]
    q.cleanFront()
    q.pushBack(w)
    t.idleConnWait[w.key] = q
    return false
}
// cancelTimerBody is an io.ReadCloser that wraps rc with two features:
// 1) On Read error or close, the stop func is called.
// 2) On Read failure, if reqDidTimeout is true, the error is wrapped and
//    marked as net.Error that hit its timeout.
type cancelTimerBody struct {
    stop          func() // stops the time.Timer waiting to cancel the request
    rc            io.ReadCloser
    reqDidTimeout func() bool
}
// dialSerial connects to a list of addresses in sequence, returning
// either the first successful connection, or the first error.
func (sd *sysDialer) dialSerial(ctx context.Context, ras addrList) (Conn, error) {
    var firstErr error // The error from the first address is most relevant.

    for i, ra := range ras {
        select {
        case <-ctx.Done():
      // operation was canceled 错误是从这里抛出来的
            return nil, &OpError{Op: "dial", Net: sd.network, Source: sd.LocalAddr, Addr: ra, Err: mapErr(ctx.Err())}
        default:
        }

        dialCtx := ctx
        if deadline, hasDeadline := ctx.Deadline(); hasDeadline {
            partialDeadline, err := partialDeadline(time.Now(), deadline, len(ras)-i)
            if err != nil {
                // Ran out of time.
                if firstErr == nil {
                    firstErr = &OpError{Op: "dial", Net: sd.network, Source: sd.LocalAddr, Addr: ra, Err: err}
                }
                break
            }
            if partialDeadline.Before(deadline) {
                var cancel context.CancelFunc
                dialCtx, cancel = context.WithDeadline(ctx, partialDeadline)
                defer cancel()
            }
      ...
// dialSerial里面的<-ctx.Done()是setRequestCancel()返回的stopTimer所激发的
// setRequestCancel sets req.Cancel and adds a deadline context to req
// if deadline is non-zero. The RoundTripper's type is used to
// determine whether the legacy CancelRequest behavior should be used.
//
// As background, there are three ways to cancel a request:
// First was Transport.CancelRequest. (deprecated)
// Second was Request.Cancel.
// Third was Request.Context.
// This function populates the second and third, and uses the first if it really needs to.
func setRequestCancel(req *Request, rt RoundTripper, deadline time.Time) (stopTimer func(), didTimeout func() bool) {
    if deadline.IsZero() {
        return nop, alwaysFalse
    }
    knownTransport := knownRoundTripperImpl(rt, req)
    oldCtx := req.Context()
  ...
}

大致的流程如下:
通过dial

总结

dial报错却没有抛出给http client的原因是,这里的dial报错并不是连接的时候报错,而是在连接过程中发现有异步的其他conn已经处理成功, 触发了ctx.Done而抛出的报错。所以并不会抛出给最外层的http client。

你可能感兴趣的:(go)