关于GoLang服务的平滑重启

引出问题

我们在用go开发的过程中,如果修改了代码,都是control+c杀死运行的进程,然后再go run 或者是 go build之后运行,而当我们的项目上线后,直接杀死进程会导致线上服务中断,在生产环境中是绝对不允许的

 

解决思路

更改代码之后,重新编译,重启进程时,当前主进程fork出一个子进程,让它运行改动后的程序。

 

实现细节

如何通知进程进行平滑重启呢,答案是通过注册SIGHUP信号量,在handle方法中去进行处理。那么在fork出子进程时,都做了什么呢,是如何处理正在执行的服务呢?我们知道所有的连接都是通过socket文件描述符来进行通信的,所以我们只要获取父进程的socket文件描述符,赋值给新fork出来的子进程,此时再来新的请求,文件描述符指向的是新的子进程,所以都由子进程来处理,当父进程处理完当前请求后,则会执行SIGTERM信号将其杀死,此时子进程由于没有了父进程,所以变为了僵尸进程,移交给1号系统进程接管。大致代码如下:

用endless框架去注册信号量处理方法(go get github.com/fvbock/endless)

server := gin.New();
group := server.Group("")
group.GET("/ping", func(c *gin.Context) {
	c.JSON(http.StatusOK, gin.H{
		"errno": 0,
		"errmsg": "success",
		"data":	"",
		"user_msg": "",
	})
})

tmpServer := endless.NewServer(fmt.Sprintf(":%s", strconv.Itoa(Port)), server)
tmpServer.BeforeBegin = func(add string) {
	log.Printf("Actual pid is %d", syscall.Getpid())
}
err := tmpServer.ListenAndServe()
if err != nil {
	log.Printf("Server err: %v", err)
}

下面是endless的源码

ListenAndServe:

func (srv *endlessServer) ListenAndServe() (err error) {
	addr := srv.Addr
	if addr == "" {
		addr = ":http"
	}

	go srv.handleSignals()  //注册信号量处理方法

	l, err := srv.getListener(addr)
	if err != nil {
		log.Println(err)
		return
	}

	srv.EndlessListener = newEndlessListener(l, srv)

	if srv.isChild {
		syscall.Kill(syscall.Getppid(), syscall.SIGTERM) //子进程中通过SIGTERM信号kill掉父进程
	}

	srv.BeforeBegin(srv.Addr)

	return srv.Serve()
}

handleSignals:

func (srv *endlessServer) handleSignals() {
	var sig os.Signal

	signal.Notify(
		srv.sigChan,
		hookableSignals...,
	)

	pid := syscall.Getpid()
	for {
		sig = <-srv.sigChan
		srv.signalHooks(PRE_SIGNAL, sig)
		switch sig {
		case syscall.SIGHUP:
			log.Println(pid, "Received SIGHUP. forking.")
			err := srv.fork() //fork子进程 
			if err != nil {
				log.Println("Fork err:", err)
			}
		case syscall.SIGUSR1:
			log.Println(pid, "Received SIGUSR1.")
		case syscall.SIGUSR2:
			log.Println(pid, "Received SIGUSR2.")
			srv.hammerTime(0 * time.Second)
		case syscall.SIGINT:
			log.Println(pid, "Received SIGINT.")
			srv.shutdown()
		case syscall.SIGTERM:
			log.Println(pid, "Received SIGTERM.")
			srv.shutdown()
		case syscall.SIGTSTP:
			log.Println(pid, "Received SIGTSTP.")
		default:
			log.Printf("Received %v: nothing i care about...\n", sig)
		}
		srv.signalHooks(POST_SIGNAL, sig)
	}
}

fork去赋值socket文件描述符:

func (srv *endlessServer) fork() (err error) {
	runningServerReg.Lock()
	defer runningServerReg.Unlock()

	// only one server instance should fork!
	if runningServersForked {
		return errors.New("Another process already forked. Ignoring this one.")
	}

	runningServersForked = true

	var files = make([]*os.File, len(runningServers))
	var orderArgs = make([]string, len(runningServers))
	// get the accessor socket fds for _all_ server instances
	for _, srvPtr := range runningServers {
		// introspect.PrintTypeDump(srvPtr.EndlessListener)
		switch srvPtr.EndlessListener.(type) {
		case *endlessListener:
			// normal listener
			files[socketPtrOffsetMap[srvPtr.Server.Addr]] = srvPtr.EndlessListener.(*endlessListener).File()
		default:
			// tls listener
			files[socketPtrOffsetMap[srvPtr.Server.Addr]] = srvPtr.tlsInnerListener.File()
		}
		orderArgs[socketPtrOffsetMap[srvPtr.Server.Addr]] = srvPtr.Server.Addr
	}

	env := append(
		os.Environ(),
		"ENDLESS_CONTINUE=1",
	)
	if len(runningServers) > 1 {
		env = append(env, fmt.Sprintf(`ENDLESS_SOCKET_ORDER=%s`, strings.Join(orderArgs, ",")))
	}

	// log.Println(files)
	path := os.Args[0]
	var args []string
	if len(os.Args) > 1 {
		args = os.Args[1:]
	}

	cmd := exec.Command(path, args...)
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	cmd.ExtraFiles = files
	cmd.Env = env

	// cmd.SysProcAttr = &syscall.SysProcAttr{
	// 	Setsid:  true,
	// 	Setctty: true,
	// 	Ctty:    ,
	// }

	err = cmd.Start()
	if err != nil {
		log.Fatalf("Restart: Failed to launch, error: %v", err)
	}

	return
}

运行结果

1、编译并启动程序

localhost:go why$ go build main.go 
localhost:go why$ ./main.go 
[GIN-debug] [WARNING] Running in "debug" mode. Switch to "release" mode in production.
 - using env:   export GIN_MODE=release
 - using code:  gin.SetMode(gin.ReleaseMode)

[GIN-debug] GET    /ping                     --> main.main.func1 (1 handlers)
2020/02/08 17:52:26 Actual pid is 16333

2、调用接口

curl -XGET localhost:777/ping

{
    "data": "",
    "errmsg": "success",
    "errno": 0,
    "user_msg": ""
}

3、改动代码

c.JSON(http.StatusOK, gin.H{
	"errno": 0,
	"errmsg": "success",
	"data":	"new data",
	"user_msg": "",
})

4、重新编译

go build main.go

5、平滑重启,可以看到新进程的PID为1

whydeMacBook-Pro:go why$ kill -1 16333
whydeMacBook-Pro:go why$ lsof -i:777
COMMAND   PID USER   FD   TYPE             DEVICE SIZE/OFF NODE NAME
main    16350  why    3u  IPv6 0x8dbd126b94b08875      0t0  TCP *:multiling-http (LISTEN)
main    16350  why    6u  IPv6 0x8dbd126b94b08875      0t0  TCP *:multiling-http (LISTEN)
whydeMacBook-Pro:go why$ ps -ef | grep 16350
  501 16350     1   0  5:53下午 ttys004    0:00.03 /var/folders/_s/jfrm6_712w58sytpc753pmr40000gn/T/go-build001868412/b001/exe/main
  501 16395 34106   0  5:56下午 ttys007    0:00.00 grep 16350
whydeMacBook-Pro:go why$

6、查看结果

{
    "data": "new data",
    "errmsg": "success",
    "errno": 0,
    "user_msg": ""
}

 

你可能感兴趣的:(go)