还记得当时我们学习 TP(ThinkPHP)时候想搞清楚框架运行流程,然后就进行各种断点和代码阅读,现在就想搞清楚Go程序的启动流程是什么?
启动流程分析
请注意这篇博客中会有很多汇编代码,你需要提前了解一下这方面的知识(Go 汇编器快速入门请参考这里)。让我们开始吧!
当前的Go版本为:go1.11.5 darwin/amd64
package main
import "fmt"
func main() {
fmt.Println("Hell asm!")
}
然后,编译并链接:
go build
生成可运行程序
objdump -d 可运行程序 > asm.txt
大概是一个 10w 多行的文件,在这里面寻找
针对不同的平台都有各自的特定的汇编文件,我这里通过rt0_darwin_amd64.s定位到runtime.rt0_go方法。先来看下asm_amd64.s的源码。文件很大,省略部分代码,留下初始化过程的重要步骤。
// 源码 src/runtime/rt0_darwin_amd64.s
__rt0_amd64_darwin:
1052290: e9 eb c6 ff ff jmp -14613 <__rt0_amd64>
// 源码 src/runtime/asm_amd64.s
// _rt0_amd64是大多数amd64系统使用时的常用启动代码
__rt0_amd64:
...
104e989: e9 02 00 00 00 jmp 2
runtime.rt0_go:
// 1.查询cpu信息
// 2.如果有cgo,初始化cgo; 调用setg_gcc(g0),然后更新stackguard。
// 3.设置tls
104ea42: e8 09 3c 00 00 callq 15369
// 4.src/runtime/stubs.go
104ea63: e8 18 1a 00 00 callq 6680
// 5.做一些运算检测 src/runtime/runtime1.go
104ea87: e8 34 71 fe ff callq -102092
// 6.把二进制文件的绝对路径找出来 src/runtime/runtime1.go
104ea9d: e8 ce 6b fe ff callq -103474
// 7.获取CPU核数与内存页大小 src/runtime/os_darwin.go
104eaa2: e8 49 62 fd ff callq -171447
// 8\. 命令行参数、环境变量、gc、栈空间、内存管理、所有P实例、HASH算法等初始化 src/runtime/proc.go
104eaa7: e8 34 ae fd ff callq -152012
// 9.新建一个goroutine,该goroutine绑定runtime.main,放在P的本地队列,等待调度 src/runtime/proc.go
104eab6: e8 65 17 fe ff callq -125083
// 10.启动M,开始调度goroutine src/runtime/proc.go
104eabd: e8 7e cc fd ff callq -144258
执行流程总结
按顺序总结下runtime.rt0_go里几件重要的事:
检查运行平台的CPU,设置好程序运行需要相关标志。
- 1.TLS的初始化。
- 2.runtime.args、runtime.osinit、runtime.schedinit 三个方法做好程序运行需要的各种变量与调度器。
- 3.runtime.newproc 创建新的goroutine用于绑定用户写的main方法。
- 4.runtime.mstart 开始goroutine的调度。
具体源码
下面接着针对上面几个runtime函数,粗略探索下干了什么事情。我们也只看一层代码,有兴趣的同学可以顺着这个顺序深入看下
runtime.args
就是把二进制文件的绝对路径找出来,并存在os.executablePath里。
func args(c int32, v **byte) {
argc = c
argv = v
sysargs(c, v)
}
//go:linkname executablePath os.executablePath
var executablePath string
func sysargs(argc int32, argv **byte) {
// skip over argv, envv and the first string will be the path
n := argc + 1
for argv_index(argv, n) != nil {
n++
}
executablePath = gostringnocopy(argv_index(argv, n+1))
// strip "executable_path=" prefix if available, it's added after OS X 10.11.
const prefix = "executable_path="
if len(executablePath) > len(prefix) && executablePath[:len(prefix)] == prefix {
executablePath = executablePath[len(prefix):]
}
}
runtime.osinit
获取CPU核数与内存页大小。按照本文的测试工程:
// BSD interface for threading.
func osinit() {
// pthread_create delayed until end of goenvs so that we
// can look at the environment first.
ncpu = getncpu()
physPageSize = getPageSize()
}
const (
_CTL_HW = 6
_HW_NCPU = 3
_HW_PAGESIZE = 7
)
func getncpu() int32 {
// Use sysctl to fetch hw.ncpu.
mib := [2]uint32{_CTL_HW, _HW_NCPU}
out := uint32(0)
nout := unsafe.Sizeof(out)
ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
if ret >= 0 && int32(out) > 0 {
return int32(out)
}
return 1
}
func getPageSize() uintptr {
// Use sysctl to fetch hw.pagesize.
mib := [2]uint32{_CTL_HW, _HW_PAGESIZE}
out := uint32(0)
nout := unsafe.Sizeof(out)
ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
if ret >= 0 && int32(out) > 0 {
return uintptr(out)
}
return 0
}
runtime.schedinit
初始化程序运行需要环境
// The bootstrap sequence is:
//
// call osinit
// call schedinit
// make & queue new G
// call runtime·mstart
//
// The new G calls runtime·main.
func schedinit() {
// raceinit must be the first call to race detector.
// In particular, it must be done before mallocinit below calls racemapshadow.
// 获取g实例
_g_ := getg()
if raceenabled {
_g_.racectx, raceprocctx0 = raceinit()
}
sched.maxmcount = 10000 // 设置全局线程数上限
tracebackinit() // 初始化一系列函数所在的PC计数器,用于traceback
moduledataverify() // 验证链接器符号的正确性
stackinit() // 栈的初始化
mallocinit() // 内存分配器初始化
mcommoninit(_g_.m)
cpuinit() // must run before alginit
alginit() // maps must not be used before this call
modulesinit() // provides activeModules
typelinksinit() // uses maps, activeModules
itabsinit() // uses activeModules
msigsave(_g_.m)
initSigmask = _g_.m.sigmask
goargs() // 获取命令行参数
goenvs() // 获取所有的环境变量
parsedebugvars() // GODEBUG 设置
gcinit() // gc初始化
sched.lastpoll = uint64(nanotime())
procs := ncpu // P个数检查
if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 { // 设置 GOMAXPROCS 参数
procs = n
}
if procresize(procs) != nil { // 所有P的初始化
throw("unknown runnable goroutine during bootstrap")
}
// For cgocheck > 1, we turn on the write barrier at all times
// and check all pointer writes. We can't do this until after
// procresize because the write barrier needs a P.
if debug.cgocheck > 1 {
writeBarrier.cgo = true
writeBarrier.enabled = true
for _, p := range allp {
p.wbBuf.reset()
}
}
if buildVersion == "" {
// Condition should never trigger. This code just serves
// to ensure runtime·buildVersion is kept in the resulting binary.
buildVersion = "unknown"
}
}
runtime.newproc
newproc() 比较简单,只是获取参数的起始地址与相关寄存器。真正干活的是newproc1()。
runtime.newproc1()
newproc1() 就比较长了,这儿概括下它做了的事情:
- 从TLS拿到当前运行的G实例,并且使绑定到当前线程的M实例不可抢占。
- 从M实例上取到P实例,如果P实例本地上有free goroutine就拿过去,没有就到全局调度器那儿偷一些过来。这两个地方都没有,就按照最低栈大小2K new一个G实例(即goroutine)。
- 然后设置好G实例上的各种寄存器的信息,SP、PC等。
- 将G实例的状态变更为Grunnable,放到P实例的本地可运行队列里等待调度执行,若队列满了,就把一半的G移到全局调度器下。
- 释放M实例的不可抢占状态。返回新的G实例。
如果是程序刚启动,经由runtime.rt0_go调用newproc1时,实质干的事情就是创建一个G,把runtime.main(也包含main.main)放进去。在执行mstart时,触发调度。所以main实际是在一个新的G里运行的,而不是g0。
// Create a new g running fn with siz bytes of arguments.
// Put it on the queue of g's waiting to run.
// The compiler turns a go statement into a call to this.
// Cannot split the stack because it assumes that the arguments
// are available sequentially after &fn; they would not be
// copied if a stack split occurred.
//go:nosplit
func newproc(siz int32, fn *funcval) {
argp := add(unsafe.Pointer(&fn), sys.PtrSize)
gp := getg()
pc := getcallerpc()
systemstack(func() {
newproc1(fn, (*uint8)(argp), siz, gp, pc)
})
}
// Create a new g running fn with narg bytes of arguments starting
// at argp. callerpc is the address of the go statement that created
// this. The new g is put on the queue of g's waiting to run.
func newproc1(fn *funcval, argp *uint8, narg int32, callergp *g, callerpc uintptr)
runtime.mstart
启动M
// Called to start an M.
//
// This must not split the stack because we may not even have stack
// bounds set up yet.
//
// May run during STW (because it doesn't have a P yet), so write
// barriers are not allowed.
//
//go:nosplit
//go:nowritebarrierrec
func mstart() {
_g_ := getg()
osStack := _g_.stack.lo == 0
if osStack {
// Initialize stack bounds from system stack.
// Cgo may have left stack size in stack.hi.
// minit may update the stack bounds.
size := _g_.stack.hi
if size == 0 {
size = 8192 * sys.StackGuardMultiplier
}
_g_.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
_g_.stack.lo = _g_.stack.hi - size + 1024
}
// Initialize stack guards so that we can start calling
// both Go and C functions with stack growth prologues.
_g_.stackguard0 = _g_.stack.lo + _StackGuard
_g_.stackguard1 = _g_.stackguard0
mstart1()
// Exit this thread.
if GOOS == "windows" || GOOS == "solaris" || GOOS == "plan9" || GOOS == "darwin" {
// Window, Solaris, Darwin and Plan 9 always system-allocate
// the stack, but put it in _g_.stack before mstart,
// so the logic above hasn't set osStack yet.
osStack = true
}
mexit(osStack)
}
func mstart1() {
_g_ := getg()
if _g_ != _g_.m.g0 {
throw("bad runtime·mstart")
}
// Record the caller for use as the top of stack in mcall and
// for terminating the thread.
// We're never coming back to mstart1 after we call schedule,
// so other calls can reuse the current frame.
save(getcallerpc(), getcallersp())
asminit()
minit() // 初始化新的 M
// Install signal handlers; after minit so that minit can
// prepare the thread to be able to handle the signals.
if _g_.m == &m0 {
mstartm0()
}
if fn := _g_.m.mstartfn; fn != nil {
fn()
}
if _g_.m.helpgc != 0 {
_g_.m.helpgc = 0
stopm()
} else if _g_.m != &m0 {
acquirep(_g_.m.nextp.ptr())
_g_.m.nextp = 0
}
schedule()
}
参考资料
- Golang the Runtime Bootstrap Process
- Golang Bootstrapping and Memory Allocator Initialization
- 探索golang程序启动过程
- golang 启动流程