使用cgo,由于内存释放导致内存无效,引起的http crash

程序基本框架:在C代码中申请内存并放入数据,然后用Go代码通过http将数据发送到对端,发送完成(成功或失败)后释放内存。

遇到的问题:当发送成功的时候,一切正常;当发送失败的时候,偶尔会crash。
//C代码
void send_data()
{
    buf = malloc(data_len);
    goHttpSendData(buf, data_len); // 在go代码中定义
}
void free_buf(void *buf)
{
    free(buf);
    buf = NULL;
}
//go 代码
func goHttpSendData(buf *C.char, len uint64_t) {
    b := (*[1<<20]bytes)(unsafe.Pointer(buf))
    data := b[:len] //大概逻辑,别去纠结大小
    body := io.MultiReader(bytes.NewReader(data))
    c := &http.Client{Timeout: timeout}
    req, err := http.NewRequest("POST", url, body)
    ......
    rsp, err := c.Do(req)
    ......
    C.free_buf(buf)
}

go crash stack如下。
unexpected fault address 0x7f43cc426ec6
fatal error: fault
[signal SIGSEGV: segmentation violation code=0x1 addr=0x7f43cc426ec6 pc=0x620fbb]

goroutine 279721 [running]:
runtime.throw(0xa43886, 0x5)
        /usr/local/go/src/runtime/panic.go:596 +0x95 fp=0xc4219b99c8 sp=0xc4219b99a8
runtime.sigpanic()
        /usr/local/go/src/runtime/signal_unix.go:297 +0x28c fp=0xc4219b9a18 sp=0xc4219b99c8
runtime.memmove(0xc42104d000, 0x7f43cc425f46, 0x1000)
        /usr/local/go/src/runtime/memmove_amd64.s:329 +0x24b fp=0xc4219b9a20 sp=0xc4219b9a18
bytes.(*Reader).Read(0xc420edf950, 0xc42104d000, 0x1000, 0x1000, 0x0, 0x0, 0xc4207f01d0)
        /usr/local/go/src/bytes/reader.go:43 +0x91 fp=0xc4219b9a50 sp=0xc4219b9a20
io.(*multiReader).Read(0xc421065080, 0xc42104d000, 0x1000, 0x1000, 0xc4219b9b30, 0x6bb505, 0xc4211865a0)
        /usr/local/go/src/io/multi.go:26 +0xcf fp=0xc4219b9ab0 sp=0xc4219b9a50
io/ioutil.(*nopCloser).Read(0xc420a4de40, 0xc42104d000, 0x1000, 0x1000, 0x1000, 0x0, 0x0)
        :1 +0x6b fp=0xc4219b9af8 sp=0xc4219b9ab0
io.(*LimitedReader).Read(0xc4210651e0, 0xc42104d000, 0x1000, 0x1000, 0x1000, 0x0, 0x0)
        /usr/local/go/src/io/io.go:436 +0x6c fp=0xc4219b9b40 sp=0xc4219b9af8
bufio.(*Writer).ReadFrom(0xc420a7a600, 0xe21240, 0xc4210651e0, 0x7f4834b16348, 0xc420a7a600, 0x1)
        /usr/local/go/src/bufio/bufio.go:696 +0xcd fp=0xc4219b9bb0 sp=0xc4219b9b40
io.copyBuffer(0xe20c80, 0xc420a7a600, 0xe21240, 0xc4210651e0, 0x0, 0x0, 0x0, 0x9eac40, 0x1, 0xc4210651e0)
        /usr/local/go/src/io/io.go:384 +0x2cb fp=0xc4219b9c18 sp=0xc4219b9bb0
io.Copy(0xe20c80, 0xc420a7a600, 0xe21240, 0xc4210651e0, 0xf, 0xc420a4dea0, 0x1)
        /usr/local/go/src/io/io.go:360 +0x68 fp=0xc4219b9c78 sp=0xc4219b9c18
net/http.(*transferWriter).WriteBody(0xc420edd000, 0xe20c80, 0xc420a7a600, 0x2, 0x2)
        /usr/local/go/src/net/http/transfer.go:319 +0x675 fp=0xc4219b9d48 sp=0xc4219b9c78
net/http.(*Request).write(0xc420093300, 0xe20c80, 0xc420a7a600, 0xc420e55f00, 0xc420edfad0, 0x0, 0x0, 0x0)
        /usr/local/go/src/net/http/request.go:622 +0x6e9 fp=0xc4219b9e88 sp=0xc4219b9d48
net/http.(*persistConn).writeLoop(0xc4211865a0)
        /usr/local/go/src/net/http/transport.go:1707 +0x1ad fp=0xc4219b9fd8 sp=0xc4219b9e88
runtime.goexit()
        /usr/local/go/src/runtime/asm_amd64.s:2197 +0x1 fp=0xc4219b9fe0 sp=0xc4219b9fd8
created by net/http.(*Transport).dialConn
        /usr/local/go/src/net/http/transport.go:1118 +0xa5a
        
问题分析:很明显是访问无效内存导致的,可能原因:
1、代码逻辑问题:梳理了代码逻辑,内存的释放是在完成发送(无论成功还是失败)后才释放的,觉得没有问题,但是为什么偏偏在发送失败的时候会crash呢?
2、go http包的bug:网上搜索也没发现什么
3、老大提醒,可能和go的 GC 有关,内存被释放后,可能go还在引用这个内存,故:
    a 将内存拷贝一份到go代码中,内存释放交给GC,copy(data_copy, data):跑了20小时后,内存消耗了30+GB;
    b 观察GC频率:一般每2分钟一次,当内存消耗多的时候实际GC频率小于2分钟,
以上证明:释放内存的时候,Go还在引用,故翻看http发送源码,

// net/http/client.go
Do()
{
    c.send() {
        resp, didTimeout, err = send(req, c.transport(), deadline)
    }
}
func (c *Client) transport() RoundTripper {
    if c.Transport != nil {
        return c.Transport
    }
    return DefaultTransport
}
func send(ireq *Request, rt RoundTripper, deadline time.Time) (resp *Response, didTimeout func() bool, err error) {
    ...
    resp, err = rt.RoundTrip(req)
    ...
}
// net/http/transport.go
func (t *Transport) RoundTrip(req *Request) (*Response, error) {
    ...
    go func() {
        pc, err := t.dialConn(ctx, cm)
        dialc <- dialRes{pc, err}
    }()
    ...
}
func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistConn, error) {
    ...
    go pconn.readLoop()
    go pconn.writeLoop() // 起了go routine来异步读取,所以返回失败的时候,释放内存后,遇到读还在继续的情况,就会导致crash
    ...
}

// bytes/reader.go
// Read implements the io.Reader interface.
func (r *Reader) Read(b []byte) (n int, err error) {
    if r.i >= int64(len(r.s)) {
        return 0, io.EOF
    }
    r.prevRune = -1
    n = copy(b, r.s[r.i:]) // crash 的地方
    r.i += int64(n)
    return
}

解决方法:
自己实现一个multiReader:当Do()失败后,MarkInvalid()将内存标记为无效,Reader在读取内存时,发现无效后,返回错误;
type myMultiReader struct {
     sync.Mutex
     isValid bool
     myReader io.Reader
}
func (mr *myMultiReader) MarkInvalid() {
     mr.Lock()
     defer mr.Unlock()
     mr.isValid = false
}
func (mr *myMultiReader) Read(p []byte) (n int, err error) {
     mr.Lock()
     defer mr.Unlock()
     if mr.isValid == false {
         return 0, fmt.Errorf("memory is invalid")
     }
     return mr.myReader.Read(p)
}
 
func NewmyMultiReader(readers ...io.Reader) *myMultiReader {
     return &myMultiReader{myReader:io.MultiReader(readers...), isValid:true}
}

 

最后: C 和 Go的内存管理机制不同,这个要特别注意。

你可能感兴趣的:(Golang)