程序基本框架:在C代码中申请内存并放入数据,然后用Go代码通过http将数据发送到对端,发送完成(成功或失败)后释放内存。
遇到的问题:当发送成功的时候,一切正常;当发送失败的时候,偶尔会crash。
//C代码
void send_data()
{
buf = malloc(data_len);
goHttpSendData(buf, data_len); // 在go代码中定义
}
void free_buf(void *buf)
{
free(buf);
buf = NULL;
}
//go 代码
func goHttpSendData(buf *C.char, len uint64_t) {
b := (*[1<<20]bytes)(unsafe.Pointer(buf))
data := b[:len] //大概逻辑,别去纠结大小
body := io.MultiReader(bytes.NewReader(data))
c := &http.Client{Timeout: timeout}
req, err := http.NewRequest("POST", url, body)
......
rsp, err := c.Do(req)
......
C.free_buf(buf)
}
go crash stack如下。
unexpected fault address 0x7f43cc426ec6
fatal error: fault
[signal SIGSEGV: segmentation violation code=0x1 addr=0x7f43cc426ec6 pc=0x620fbb]
goroutine 279721 [running]:
runtime.throw(0xa43886, 0x5)
/usr/local/go/src/runtime/panic.go:596 +0x95 fp=0xc4219b99c8 sp=0xc4219b99a8
runtime.sigpanic()
/usr/local/go/src/runtime/signal_unix.go:297 +0x28c fp=0xc4219b9a18 sp=0xc4219b99c8
runtime.memmove(0xc42104d000, 0x7f43cc425f46, 0x1000)
/usr/local/go/src/runtime/memmove_amd64.s:329 +0x24b fp=0xc4219b9a20 sp=0xc4219b9a18
bytes.(*Reader).Read(0xc420edf950, 0xc42104d000, 0x1000, 0x1000, 0x0, 0x0, 0xc4207f01d0)
/usr/local/go/src/bytes/reader.go:43 +0x91 fp=0xc4219b9a50 sp=0xc4219b9a20
io.(*multiReader).Read(0xc421065080, 0xc42104d000, 0x1000, 0x1000, 0xc4219b9b30, 0x6bb505, 0xc4211865a0)
/usr/local/go/src/io/multi.go:26 +0xcf fp=0xc4219b9ab0 sp=0xc4219b9a50
io/ioutil.(*nopCloser).Read(0xc420a4de40, 0xc42104d000, 0x1000, 0x1000, 0x1000, 0x0, 0x0)
io.(*LimitedReader).Read(0xc4210651e0, 0xc42104d000, 0x1000, 0x1000, 0x1000, 0x0, 0x0)
/usr/local/go/src/io/io.go:436 +0x6c fp=0xc4219b9b40 sp=0xc4219b9af8
bufio.(*Writer).ReadFrom(0xc420a7a600, 0xe21240, 0xc4210651e0, 0x7f4834b16348, 0xc420a7a600, 0x1)
/usr/local/go/src/bufio/bufio.go:696 +0xcd fp=0xc4219b9bb0 sp=0xc4219b9b40
io.copyBuffer(0xe20c80, 0xc420a7a600, 0xe21240, 0xc4210651e0, 0x0, 0x0, 0x0, 0x9eac40, 0x1, 0xc4210651e0)
/usr/local/go/src/io/io.go:384 +0x2cb fp=0xc4219b9c18 sp=0xc4219b9bb0
io.Copy(0xe20c80, 0xc420a7a600, 0xe21240, 0xc4210651e0, 0xf, 0xc420a4dea0, 0x1)
/usr/local/go/src/io/io.go:360 +0x68 fp=0xc4219b9c78 sp=0xc4219b9c18
net/http.(*transferWriter).WriteBody(0xc420edd000, 0xe20c80, 0xc420a7a600, 0x2, 0x2)
/usr/local/go/src/net/http/transfer.go:319 +0x675 fp=0xc4219b9d48 sp=0xc4219b9c78
net/http.(*Request).write(0xc420093300, 0xe20c80, 0xc420a7a600, 0xc420e55f00, 0xc420edfad0, 0x0, 0x0, 0x0)
/usr/local/go/src/net/http/request.go:622 +0x6e9 fp=0xc4219b9e88 sp=0xc4219b9d48
net/http.(*persistConn).writeLoop(0xc4211865a0)
/usr/local/go/src/net/http/transport.go:1707 +0x1ad fp=0xc4219b9fd8 sp=0xc4219b9e88
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:2197 +0x1 fp=0xc4219b9fe0 sp=0xc4219b9fd8
created by net/http.(*Transport).dialConn
/usr/local/go/src/net/http/transport.go:1118 +0xa5a
问题分析:很明显是访问无效内存导致的,可能原因:
1、代码逻辑问题:梳理了代码逻辑,内存的释放是在完成发送(无论成功还是失败)后才释放的,觉得没有问题,但是为什么偏偏在发送失败的时候会crash呢?
2、go http包的bug:网上搜索也没发现什么
3、老大提醒,可能和go的 GC 有关,内存被释放后,可能go还在引用这个内存,故:
a 将内存拷贝一份到go代码中,内存释放交给GC,copy(data_copy, data):跑了20小时后,内存消耗了30+GB;
b 观察GC频率:一般每2分钟一次,当内存消耗多的时候实际GC频率小于2分钟,
以上证明:释放内存的时候,Go还在引用,故翻看http发送源码,
// net/http/client.go
Do()
{
c.send() {
resp, didTimeout, err = send(req, c.transport(), deadline)
}
}
func (c *Client) transport() RoundTripper {
if c.Transport != nil {
return c.Transport
}
return DefaultTransport
}
func send(ireq *Request, rt RoundTripper, deadline time.Time) (resp *Response, didTimeout func() bool, err error) {
...
resp, err = rt.RoundTrip(req)
...
}
// net/http/transport.go
func (t *Transport) RoundTrip(req *Request) (*Response, error) {
...
go func() {
pc, err := t.dialConn(ctx, cm)
dialc <- dialRes{pc, err}
}()
...
}
func (t *Transport) dialConn(ctx context.Context, cm connectMethod) (*persistConn, error) {
...
go pconn.readLoop()
go pconn.writeLoop() // 起了go routine来异步读取,所以返回失败的时候,释放内存后,遇到读还在继续的情况,就会导致crash
...
}
// bytes/reader.go
// Read implements the io.Reader interface.
func (r *Reader) Read(b []byte) (n int, err error) {
if r.i >= int64(len(r.s)) {
return 0, io.EOF
}
r.prevRune = -1
n = copy(b, r.s[r.i:]) // crash 的地方
r.i += int64(n)
return
}
解决方法:
自己实现一个multiReader:当Do()失败后,MarkInvalid()将内存标记为无效,Reader在读取内存时,发现无效后,返回错误;
type myMultiReader struct {
sync.Mutex
isValid bool
myReader io.Reader
}
func (mr *myMultiReader) MarkInvalid() {
mr.Lock()
defer mr.Unlock()
mr.isValid = false
}
func (mr *myMultiReader) Read(p []byte) (n int, err error) {
mr.Lock()
defer mr.Unlock()
if mr.isValid == false {
return 0, fmt.Errorf("memory is invalid")
}
return mr.myReader.Read(p)
}
func NewmyMultiReader(readers ...io.Reader) *myMultiReader {
return &myMultiReader{myReader:io.MultiReader(readers...), isValid:true}
}
最后: C 和 Go的内存管理机制不同,这个要特别注意。