containerd is a binary , it will run background when docker service starts.
containerd -l unix:///var/run/docker/libcontainerd/docker-containerd.sock --shim containerd-shim --metrics-interval=0 --start-timeout 2m --state-dir /var/run/docker/libcontainerd/containerd --runtime runc
max@max-VirtualBox:~$ containerd -h
NAME:
containerd - High performance container daemon
USAGE:
containerd [global options] command [command options] [arguments...]
VERSION:
0.2.3
COMMANDS:
help, h Shows a list of commands or help for one command
GLOBAL OPTIONS:
--debug enable debug output in the logs
--state-dir "/run/containerd" runtime state directory
--metrics-interval "5m0s" interval for flushing metrics to the store
--listen, -l "unix:///run/containerd/containerd.sock" proto://address on which the GRPC API will listen
--runtime, -r "runc" name or path of the OCI compliant runtime to use when executing containers
--runtime-args [--runtime-args option --runtime-args option] specify additional runtime args
--shim "containerd-shim" Name or path of shim
--pprof-address http address to listen for pprof events
--start-timeout "15s" timeout duration for waiting on a container to start before it is killed
--retain-count "500" number of past events to keep in the event log
--graphite-address Address of graphite server
--help, -h show help
--version, -v print the version
// Supervisor represents a container supervisor
type Supervisor struct {
// stateDir is the directory on the system to store container runtime state information.
stateDir string
// name of the OCI compatible runtime used to execute containers
runtime string
runtimeArgs []string
shim string
containers map[string]*containerInfo
startTasks chan *startTask
// we need a lock around the subscribers map only because additions and deletions from
// the map are via the API so we cannot really control the concurrency
subscriberLock sync.RWMutex
subscribers map[chan Event]struct{}
machine Machine
tasks chan Task
monitor *Monitor
eventLog []Event
eventLock sync.Mutex
timeout time.Duration
}
这里最要的就是 两个channel,就是上边的Supervisor 中的tasks和startTasks。还有两个重要的go routine: supervisor go routine 还有worker go routine
---put tasks---->task chan-------task chan to starttasks----->startTasks--------->get starttask--------
(SendTask) in supervisor go routine (handleTask)
首先分析一下连个go routine 的初始化代码:
func main() {
......
app.Action = func(context *cli.Context) {
// Maxx daemon
if err := daemon(context); err != nil {
logrus.Fatal(err)
}
}
......
}
func daemon(context *cli.Context) error {
......
// Maxx start 10 worker thread
wg := &sync.WaitGroup{}
for i := 0; i < 10; i++ {
wg.Add(1)
w := supervisor.NewWorker(sv, wg)
go w.Start()
}
// Maxx start GRPC server of containerd
if err := sv.Start(); err != nil {
return err
}
......
}
1. 在daemon函数中首先起了10个worker 协程。
// NewWorker return a new initialized worker
func NewWorker(s *Supervisor, wg *sync.WaitGroup) Worker {
return &worker{
s: s,
wg: wg,
}
}
// Worker interface
type Worker interface {
Start()
}
首先看看startTasks中有没有startTask
// Start runs a loop in charge of starting new containers
func (w *worker) Start() {
defer w.wg.Done()
for t := range w.s.startTasks {
started := time.Now()
// Maxx start D:\study\go\containerd-docker-v1.12.x\runtime\container.go, also call the start below
process, err := t.Container.Start(t.CheckpointPath, runtime.NewStdio(t.Stdin, t.Stdout, t.Stderr))
......
if t.CheckpointPath == "" {
// Maxx call exec.cmd(docker-runc start $CID)
// D:\study\go\containerd-docker-v1.12.x\runtime\process.go
if err := process.Start(); err != nil {
......
}
t.Container.Start()
func (c *container) Start(checkpointPath string, s Stdio) (Process, error) {
processRoot := filepath.Join(c.root, c.id, InitProcessID)
if err := os.Mkdir(processRoot, 0755); err != nil {
return nil, err
}
// Maxx start shim process cmd
/*
docker-containerd-shim 817c43b3f5794d0e5dfdb92acf60fe7653b3efc33a4388733d357d00a8d8ae1a /var/run/docker/libcontainerd/817c43b3f5794d0e5dfdb92acf60fe7653b3efc33a4388733d357d00a8d8ae1a docker-runc*/
cmd := exec.Command(c.shim,
c.id, c.bundle, c.runtime,
)
cmd.Dir = processRoot
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
spec, err := c.readSpec()
if err != nil {
return nil, err
}
config := &processConfig{
checkpoint: checkpointPath,
root: processRoot,
id: InitProcessID,
c: c,
stdio: s,
spec: spec,
processSpec: specs.ProcessSpec(spec.Process),
}
p, err := newProcess(config)
if err != nil {
return nil, err
}
// Max run the cmd, now containerd end, turn to runc
if err := c.createCmd(InitProcessID, cmd, p); err != nil {
return nil, err
}
return p, nil
}
D:\study\go\containerd-docker-v1.12.x\runtime\process.go
// Start unblocks the associated container init process.
// This should only be called on the process with ID "init"
func (p *process) Start() error {
if p.ID() == InitProcessID {
var (
errC = make(chan error, 1)
args = append(p.container.runtimeArgs, "start", p.container.id)
cmd = exec.Command(p.container.runtime, args...)
)
go func() {
out, err := cmd.CombinedOutput()
if err != nil {
errC <- fmt.Errorf("%s: %q", err.Error(), out)
}
errC <- nil
}()
select {
case err := <-errC:
if err != nil {
return err
}
case <-p.cmdDoneCh:
if !p.cmdSuccess {
if cmd.Process != nil {
cmd.Process.Kill()
}
cmd.Wait()
return ErrShimExited
}
err := <-errC
if err != nil {
return err
}
}
}
return nil
}
2. sv.start()
// Start is a non-blocking call that runs the supervisor for monitoring contianer processes and
// executing new containers.
//
// This event loop is the only thing that is allowed to modify state of containers and processes
// therefore it is save to do operations in the handlers that modify state of the system or
// state of the Supervisor
func (s *Supervisor) Start() error {
logrus.WithFields(logrus.Fields{
"stateDir": s.stateDir,
"runtime": s.runtime,
"runtimeArgs": s.runtimeArgs,
"memory": s.machine.Memory,
"cpus": s.machine.Cpus,
}).Debug("containerd: supervisor running")
go func() {
for i := range s.tasks {
// Maxx xxx
s.handleTask(i)
}
}()
return nil
}
func (s *Supervisor) handleTask(i Task) {
var err error
switch t := i.(type) {
case *AddProcessTask:
err = s.addProcess(t)
case *CreateCheckpointTask:
err = s.createCheckpoint(t)
case *DeleteCheckpointTask:
err = s.deleteCheckpoint(t)
case *StartTask:
// Maxx start come here
err = s.start(t)
case *DeleteTask:
err = s.delete(t)
case *ExitTask:
err = s.exit(t)
case *GetContainersTask:
err = s.getContainers(t)
case *SignalTask:
err = s.signal(t)
case *StatsTask:
err = s.stats(t)
case *UpdateTask:
err = s.updateContainer(t)
case *UpdateProcessTask:
err = s.updateProcess(t)
case *OOMTask:
err = s.oom(t)
default:
err = ErrUnknownTask
}
if err != errDeferredResponse {
i.ErrorCh() <- err
close(i.ErrorCh())
}
}
func (s *Supervisor) start(t *StartTask) error {
start := time.Now()
rt := s.runtime
rtArgs := s.runtimeArgs
if t.Runtime != "" {
rt = t.Runtime
rtArgs = t.RuntimeArgs
}
container, err := runtime.New(runtime.ContainerOpts{
Root: s.stateDir,
ID: t.ID,
Bundle: t.BundlePath,
Runtime: rt,
RuntimeArgs: rtArgs,
Shim: s.shim,
Labels: t.Labels,
NoPivotRoot: t.NoPivotRoot,
Timeout: s.timeout,
})
if err != nil {
return err
}
s.containers[t.ID] = &containerInfo{
container: container,
}
ContainersCounter.Inc(1)
task := &startTask{
Err: t.ErrorCh(),
Container: container,
StartResponse: t.StartResponse,
Stdin: t.Stdin,
Stdout: t.Stdout,
Stderr: t.Stderr,
}
if t.Checkpoint != nil {
task.CheckpointPath = filepath.Join(t.CheckpointDir, t.Checkpoint.Name)
}
//Maxx 构造一个新的startTask,并传递给startTasks channel
// then go to D:\study\go\containerd-docker-v1.12.x\supervisor\worker.go
// Supervisor.worker的Start方法中,读取startTasks channel,并调用runtime.Container接口的Start方法
s.startTasks <- task
ContainerCreateTimer.UpdateSince(start)
return errDeferredResponse
}
源码的分析可以分为三部分,
1. put Task to tasks chan
2.tasks chan---> startTasks
3. get startTask from startTasks chan and handle it
D:\study\go\containerd-docker-v1.12.x\api\grpc\server\server.go
func (s *apiServer) CreateContainer(ctx context.Context, c *types.CreateContainerRequest) (*types.CreateContainerResponse, error) {
if c.BundlePath == "" {
return nil, errors.New("empty bundle path")
}
// Maxx start tasks for create command
e := &supervisor.StartTask{}
e.ID = c.Id
e.BundlePath = c.BundlePath
e.Stdin = c.Stdin
e.Stdout = c.Stdout
e.Stderr = c.Stderr
e.Labels = c.Labels
e.NoPivotRoot = c.NoPivotRoot
e.Runtime = c.Runtime
e.RuntimeArgs = c.RuntimeArgs
e.StartResponse = make(chan supervisor.StartResponse, 1)
if c.Checkpoint != "" {
e.CheckpointDir = c.CheckpointDir
e.Checkpoint = &runtime.Checkpoint{
Name: c.Checkpoint,
}
}
// Maxx wsendtask put task to task chan
s.sv.SendTask(e)
if err := <-e.ErrorCh(); err != nil {
return nil, err
}
r := <-e.StartResponse
apiC, err := createAPIContainer(r.Container, false)
if err != nil {
return nil, err
}
return &types.CreateContainerResponse{
Container: apiC,
}, nil
}
// SendTask sends the provided event the the supervisors main event loop
func (s *Supervisor) SendTask(evt Task) {
TasksCounter.Inc(1)
s.tasks <- evt
}
func (s *Supervisor) start(t *StartTask) error {
start := time.Now()
rt := s.runtime
rtArgs := s.runtimeArgs
if t.Runtime != "" {
rt = t.Runtime
rtArgs = t.RuntimeArgs
}
container, err := runtime.New(runtime.ContainerOpts{
Root: s.stateDir,
ID: t.ID,
Bundle: t.BundlePath,
Runtime: rt,
RuntimeArgs: rtArgs,
Shim: s.shim,
Labels: t.Labels,
NoPivotRoot: t.NoPivotRoot,
Timeout: s.timeout,
})
if err != nil {
return err
}
s.containers[t.ID] = &containerInfo{
container: container,
}
ContainersCounter.Inc(1)
task := &startTask{
Err: t.ErrorCh(),
Container: container,
StartResponse: t.StartResponse,
Stdin: t.Stdin,
Stdout: t.Stdout,
Stderr: t.Stderr,
}
if t.Checkpoint != nil {
task.CheckpointPath = filepath.Join(t.CheckpointDir, t.Checkpoint.Name)
}
//Maxx 构造一个新的startTask,并传递给startTasks channel
// then go to D:\study\go\containerd-docker-v1.12.x\supervisor\worker.go
// Supervisor.worker的Start方法中,读取startTasks channel,并调用runtime.Container接口的Start方法
s.startTasks <- task
ContainerCreateTimer.UpdateSince(start)
return errDeferredResponse
}
这个主要是 worker go 协程处理
// Start runs a loop in charge of starting new containers
func (w *worker) Start() {
defer w.wg.Done()
for t := range w.s.startTasks {
started := time.Now()
// Maxx start D:\study\go\containerd-docker-v1.12.x\runtime\container.go, also call the start below
process, err := t.Container.Start(t.CheckpointPath, runtime.NewStdio(t.Stdin, t.Stdout, t.Stderr))
if err != nil {
logrus.WithFields(logrus.Fields{
"error": err,
"id": t.Container.ID(),
}).Error("containerd: start container")
t.Err <- err
evt := &DeleteTask{
ID: t.Container.ID(),
NoEvent: true,
Process: process,
}
w.s.SendTask(evt)
continue
}
if err := w.s.monitor.MonitorOOM(t.Container); err != nil && err != runtime.ErrContainerExited {
if process.State() != runtime.Stopped {
logrus.WithField("error", err).Error("containerd: notify OOM events")
}
}
if err := w.s.monitorProcess(process); err != nil {
logrus.WithField("error", err).Error("containerd: add process to monitor")
t.Err <- err
evt := &DeleteTask{
ID: t.Container.ID(),
NoEvent: true,
Process: process,
}
w.s.SendTask(evt)
continue
}
// only call process start if we aren't restoring from a checkpoint
// if we have restored from a checkpoint then the process is already started
if t.CheckpointPath == "" {
// Maxx call exec.cmd(docker-runc start $CID)
// D:\study\go\containerd-docker-v1.12.x\runtime\process.go
if err := process.Start(); err != nil {
logrus.WithField("error", err).Error("containerd: start init process")
t.Err <- err
evt := &DeleteTask{
ID: t.Container.ID(),
NoEvent: true,
Process: process,
}
w.s.SendTask(evt)
continue
}
}
ContainerStartTimer.UpdateSince(started)
t.Err <- nil
t.StartResponse <- StartResponse{
Container: t.Container,
}
w.s.notifySubscribers(Event{
Timestamp: time.Now(),
ID: t.Container.ID(),
Type: StateStart,
})
}
}