4. docker container——创建与启动

docker client创建容器过程

在docker-ce项目中可以看到docker client的API,在cli/command/container/cmd.go中列出了NewContainerCommand的列表,NewCreateCommand就在其中。由于run其实被拆分为create和start两部,而client只负责parse flag,发送HTTP request,并不涉及什么heavy的业务逻辑,所以这里只分析create。

// NewCreateCommand creates a new cobra.Command for `docker create`
func NewCreateCommand(dockerCli command.Cli) *cobra.Command {
    var opts createOptions
    var copts *containerOptions

    cmd := &cobra.Command{
        Use:   "create [OPTIONS] IMAGE [COMMAND] [ARG...]",
        Short: "Create a new container",
        Args:  cli.RequiresMinArgs(1),
        RunE: func(cmd *cobra.Command, args []string) error {
            copts.Image = args[0]
            if len(args) > 1 {
                copts.Args = args[1:]
            return runCreate(dockerCli, cmd.Flags(), &opts, copts)

    flags := cmd.Flags()

    flags.StringVar(&opts.name, "name", "", "Assign a name to the container")

    // Add an explicit help that doesn't have a `-h` to prevent the conflict
    // with hostname
    flags.Bool("help", false, "Print usage")

    command.AddPlatformFlag(flags, &opts.platform)
    command.AddTrustVerificationFlags(flags, &opts.untrusted, dockerCli.ContentTrustEnabled())
    copts = addFlags(flags)
    return cmd

抛开parse flags不说,注册了runCreate()函数,其中又调用了createContainer()

  • 其中包含了networkingconfig,是用户指定的网络设置。
  • newCIDFile()可以将container ID写入指定文件中
  • ContainerCreate()创建容器
  • 检查是否错误,如果错误类型为镜像找不到,调用pullImage拉去镜像,并再次执行ContainerCreate()
func runCreate(dockerCli command.Cli, flags *pflag.FlagSet, opts *createOptions, copts *containerOptions) error {
    containerConfig, err := parse(flags, copts)
    if err != nil {
        reportError(dockerCli.Err(), "create", err.Error(), true)
        return cli.StatusError{StatusCode: 125}
    response, err := createContainer(context.Background(), dockerCli, containerConfig, opts)
    if err != nil {
        return err
    fmt.Fprintln(dockerCli.Out(), response.ID)
    return nil

func createContainer(ctx context.Context, dockerCli command.Cli, containerConfig *containerConfig, opts *createOptions) (*container.ContainerCreateCreatedBody, error) {
    config := containerConfig.Config
    hostConfig := containerConfig.HostConfig
    networkingConfig := containerConfig.NetworkingConfig
    stderr := dockerCli.Err()

    var (
        trustedRef reference.Canonical
        namedRef   reference.Named

    containerIDFile, err := newCIDFile(hostConfig.ContainerIDFile)
    if err != nil {
        return nil, err
    defer containerIDFile.Close()

    ref, err := reference.ParseAnyReference(config.Image)
    if err != nil {
        return nil, err
    if named, ok := ref.(reference.Named); ok {
        namedRef = reference.TagNameOnly(named)

        if taggedRef, ok := namedRef.(reference.NamedTagged); ok && !opts.untrusted {
            var err error
            trustedRef, err = image.TrustedReference(ctx, dockerCli, taggedRef, nil)
            if err != nil {
                return nil, err
            config.Image = reference.FamiliarString(trustedRef)

    //create the container
    response, err := dockerCli.Client().ContainerCreate(ctx, config, hostConfig, networkingConfig, opts.name)

    //if image not found try to pull it
    if err != nil {
        if apiclient.IsErrNotFound(err) && namedRef != nil {
            fmt.Fprintf(stderr, "Unable to find image '%s' locally\n", reference.FamiliarString(namedRef))

            // we don't want to write to stdout anything apart from container.ID
            if err := pullImage(ctx, dockerCli, config.Image, opts.platform, stderr); err != nil {
                return nil, err
            if taggedRef, ok := namedRef.(reference.NamedTagged); ok && trustedRef != nil {
                if err := image.TagTrusted(ctx, dockerCli, trustedRef, taggedRef); err != nil {
                    return nil, err
            // Retry
            var retryErr error
            response, retryErr = dockerCli.Client().ContainerCreate(ctx, config, hostConfig, networkingConfig, opts.name)
            if retryErr != nil {
                return nil, retryErr
        } else {
            return nil, err

    for _, warning := range response.Warnings {
        fmt.Fprintf(stderr, "WARNING: %s\n", warning)
    err = containerIDFile.Write(response.ID)
    return &response, err


  • 检查client版本是否符合要求。
  • 通过cli.post(ctx, "/containers/create", query, body, nil)将请求发送给daemon,可以看到path指定了/containers/create,而post中调用了sendRequest()负责发送请求。
// ContainerCreate creates a new container based in the given configuration.
// It can be associated with a name, but it's not mandatory.
func (cli *Client) ContainerCreate(ctx context.Context, config *container.Config, hostConfig *container.HostConfig, networkingConfig *network.NetworkingConfig, containerName string) (container.ContainerCreateCreatedBody, error) {
    var response container.ContainerCreateCreatedBody

    if err := cli.NewVersionError("1.25", "stop timeout"); config != nil && config.StopTimeout != nil && err != nil {
        return response, err

    // When using API 1.24 and under, the client is responsible for removing the container
    if hostConfig != nil && versions.LessThan(cli.ClientVersion(), "1.25") {
        hostConfig.AutoRemove = false

    query := url.Values{}
    if containerName != "" {
        query.Set("name", containerName)

    body := configWrapper{
        Config:           config,
        HostConfig:       hostConfig,
        NetworkingConfig: networkingConfig,

    serverResp, err := cli.post(ctx, "/containers/create", query, body, nil)
    if err != nil {
        if serverResp.statusCode == 404 && strings.Contains(err.Error(), "No such image") {
            return response, objectNotFoundError{object: "image", id: config.Image}
        return response, err

    err = json.NewDecoder(serverResp.body).Decode(&response)
    return response, err

docker daemon创建容器过程


router.NewPostRoute("/containers/create", r.postContainersCreate),

首先进入handler postContainerCreate()

  • ParseForm()CheckForJSON()都是做一些参数检查的工作
  • DecodeConfig()解析出config, host config和networking config
  • VersionFromContext()解析出版本号,对CPU shares做调整
  • 进入ContainerCreate()函数
  • 返回结果的json序列
func (s *containerRouter) postContainersCreate(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
    if err := httputils.ParseForm(r); err != nil {
        return err
    if err := httputils.CheckForJSON(r); err != nil {
        return err

    name := r.Form.Get("name")

    config, hostConfig, networkingConfig, err := s.decoder.DecodeConfig(r.Body)
    if err != nil {
        return err
    version := httputils.VersionFromContext(ctx)
    adjustCPUShares := versions.LessThan(version, "1.19")

    // When using API 1.24 and under, the client is responsible for removing the container
    if hostConfig != nil && versions.LessThan(version, "1.25") {
        hostConfig.AutoRemove = false

    ccr, err := s.backend.ContainerCreate(types.ContainerCreateConfig{
        Name:             name,
        Config:           config,
        HostConfig:       hostConfig,
        NetworkingConfig: networkingConfig,
        AdjustCPUShares:  adjustCPUShares,
    if err != nil {
        return err

    return httputils.WriteJSON(w, http.StatusCreated, ccr)


  • GetImage()根据image信息在imageService中获取镜像。
  • verifyContainerSettings()校验HostConfig和Config包含的容器设定是否合法
  • verifyNetworkingConfig()检查网络设置是否合法,容器是否能介入到指定的网络中
  • adaptContainerSettings()调整HostConfig中CPU、内存、安全性等相关的一些设定
  • daemon.create()创建容器
func (daemon *Daemon) containerCreate(params types.ContainerCreateConfig, managed bool) (containertypes.ContainerCreateCreatedBody, error) {
    start := time.Now()
    if params.Config == nil {
        return containertypes.ContainerCreateCreatedBody{}, errdefs.InvalidParameter(errors.New("Config cannot be empty in order to create a container"))

    os := runtime.GOOS
    if params.Config.Image != "" {
        img, err := daemon.imageService.GetImage(params.Config.Image)
        if err == nil {
            os = img.OS
    } else {
        // This mean scratch. On Windows, we can safely assume that this is a linux
        // container. On other platforms, it's the host OS (which it already is)
        if runtime.GOOS == "windows" && system.LCOWSupported() {
            os = "linux"

    warnings, err := daemon.verifyContainerSettings(os, params.HostConfig, params.Config, false)
    if err != nil {
        return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, errdefs.InvalidParameter(err)

    err = verifyNetworkingConfig(params.NetworkingConfig)
    if err != nil {
        return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, errdefs.InvalidParameter(err)

    if params.HostConfig == nil {
        params.HostConfig = &containertypes.HostConfig{}
    err = daemon.adaptContainerSettings(params.HostConfig, params.AdjustCPUShares)
    if err != nil {
        return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, errdefs.InvalidParameter(err)

    container, err := daemon.create(params, managed)
    if err != nil {
        return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, err

    return containertypes.ContainerCreateCreatedBody{ID: container.ID, Warnings: warnings}, nil


  • 获取镜像信息(似乎和上面的步骤有重复的地方?都从imageService上获取景象信息并校验操作系统信息)
  • mergeAndVerifyConfig()mergeAndVerifyLogConfig()对配置信息做合并和检查
  • daemon.newContainer()创建容器:
    • generateIDAndName()生成容器的ID和名称,如果用户没有指定name则生成随机名称,指定则保留当前命名
    • 检查网络模式是否是Host模式,是则获取当前系统的hostname,否则从配置文件获取hostname
    • getEntrypointAndArgs()获取entry point(是什么?)已经进入创建人容器后要执行的命令
    • newBaseContainer()根据ID创建容器,其中调用了container.NewBaseContainer(),生成了容器实例,之后对容器做一些基本的配置
  • setSecurityOptions()设定容器的安全性,调用了parseSecurityOpt(),根据hostconfig配置容器,用户在创建容器的时候可以指定--security-opt,docker根据安全等级设定相应的安全机制
  • 设置容器的storage option,挂载点
  • CreateLayer()创建容器的文件系统
  • MkdirAndChown()设置容器的文件权限为700
  • setHostConfig()设置挂载点、网络模式,并checkpoint到本地磁盘(意味着容器创建的时候就会有一个checkpoint)
  • createContainerOSSpecificSettings()挂载容器,设定容器在host上的目录和volumn容器卷
  • updateContainerNetworkSettings()更新网络设置:如果禁用网络,直接返回,否则区分是默认模式还是用户指定了网络。对各种情况做不同的设定之后返回
  • Register()标志容器在daemon中可用
  • set(container.ID, "stopped")初始化已经完成,容器标记为stopped状态
func (daemon *Daemon) create(params types.ContainerCreateConfig, managed bool) (retC *container.Container, retErr error) {
    var (
        container *container.Container
        img       *image.Image
        imgID     image.ID
        err       error

    os := runtime.GOOS
    if params.Config.Image != "" {
        img, err = daemon.imageService.GetImage(params.Config.Image)
        if err != nil {
            return nil, err
        if img.OS != "" {
            os = img.OS
        } else {
            // default to the host OS except on Windows with LCOW
            if runtime.GOOS == "windows" && system.LCOWSupported() {
                os = "linux"
        imgID = img.ID()

        if runtime.GOOS == "windows" && img.OS == "linux" && !system.LCOWSupported() {
            return nil, errors.New("operating system on which parent image was created is not Windows")
    } else {
        if runtime.GOOS == "windows" {
            os = "linux" // 'scratch' case.

    if err := daemon.mergeAndVerifyConfig(params.Config, img); err != nil {
        return nil, errdefs.InvalidParameter(err)

    if err := daemon.mergeAndVerifyLogConfig(¶ms.HostConfig.LogConfig); err != nil {
        return nil, errdefs.InvalidParameter(err)

    if container, err = daemon.newContainer(params.Name, os, params.Config, params.HostConfig, imgID, managed); err != nil {
        return nil, err
    defer func() {
        if retErr != nil {
            if err := daemon.cleanupContainer(container, true, true); err != nil {
                logrus.Errorf("failed to cleanup container on create error: %v", err)

    if err := daemon.setSecurityOptions(container, params.HostConfig); err != nil {
        return nil, err

    container.HostConfig.StorageOpt = params.HostConfig.StorageOpt

    // Fixes: https://github.com/moby/moby/issues/34074 and
    // https://github.com/docker/for-win/issues/999.
    // Merge the daemon's storage options if they aren't already present. We only
    // do this on Windows as there's no effective sandbox size limit other than
    // physical on Linux.
    if runtime.GOOS == "windows" {
        if container.HostConfig.StorageOpt == nil {
            container.HostConfig.StorageOpt = make(map[string]string)
        for _, v := range daemon.configStore.GraphOptions {
            opt := strings.SplitN(v, "=", 2)
            if _, ok := container.HostConfig.StorageOpt[opt[0]]; !ok {
                container.HostConfig.StorageOpt[opt[0]] = opt[1]

    // Set RWLayer for container after mount labels have been set
    rwLayer, err := daemon.imageService.CreateLayer(container, setupInitLayer(daemon.idMappings))
    if err != nil {
        return nil, errdefs.System(err)
    container.RWLayer = rwLayer

    rootIDs := daemon.idMappings.RootPair()
    if err := idtools.MkdirAndChown(container.Root, 0700, rootIDs); err != nil {
        return nil, err
    if err := idtools.MkdirAndChown(container.CheckpointDir(), 0700, rootIDs); err != nil {
        return nil, err

    if err := daemon.setHostConfig(container, params.HostConfig); err != nil {
        return nil, err

    if err := daemon.createContainerOSSpecificSettings(container, params.Config, params.HostConfig); err != nil {
        return nil, err

    var endpointsConfigs map[string]*networktypes.EndpointSettings
    if params.NetworkingConfig != nil {
        endpointsConfigs = params.NetworkingConfig.EndpointsConfig
    // Make sure NetworkMode has an acceptable value. We do this to ensure
    // backwards API compatibility.

    daemon.updateContainerNetworkSettings(container, endpointsConfigs)
    if err := daemon.Register(container); err != nil {
        return nil, err
    stateCtr.set(container.ID, "stopped")
    daemon.LogContainerEvent(container, "create")
    return container, nil

docker daemon启动容器过程

start容器时,同样首先进入router postContainerStart(),有类似的步骤:

  • 获取version信息,和r.ContentLength一起解析出hostconfig
  • ParseForm()确保request正确的解析,保存在r.Form中
  • 从r.Form中获取checkpoint和checkpoint-dir,恢复容器现场。docker将CRIU集成之后,命名为checkpoint,可以将容器上下文保存、恢复,用户可以docker start --checkpoint xxx来使用这个功能
  • 进入ContainerStart()
func (s *containerRouter) postContainersStart(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
    // If contentLength is -1, we can assumed chunked encoding
    // or more technically that the length is unknown
    // https://golang.org/src/pkg/net/http/request.go#L139
    // net/http otherwise seems to swallow any headers related to chunked encoding
    // including r.TransferEncoding
    // allow a nil body for backwards compatibility

    version := httputils.VersionFromContext(ctx)
    var hostConfig *container.HostConfig
    // A non-nil json object is at least 7 characters.
    if r.ContentLength > 7 || r.ContentLength == -1 {
        if versions.GreaterThanOrEqualTo(version, "1.24") {
            return bodyOnStartError{}

        if err := httputils.CheckForJSON(r); err != nil {
            return err

        c, err := s.decoder.DecodeHostConfig(r.Body)
        if err != nil {
            return err
        hostConfig = c

    if err := httputils.ParseForm(r); err != nil {
        return err

    checkpoint := r.Form.Get("checkpoint")
    checkpointDir := r.Form.Get("checkpoint-dir")
    if err := s.backend.ContainerStart(vars["name"], hostConfig, checkpoint, checkpointDir); err != nil {
        return err

    return nil


  • 由于checkpoint模式目前只是experimental的,首先判断是否使用了checkpoint flag,如果使用了是否启用了experimental功能。
  • daemon.GetContainer()从name或者ID获取容器,完整的ID可以唯一的对应容器,container name也可以通过GetByName()映射为ID,如果只传递了部分的ID,将判断是否查找到唯一的容器
  • validateState()检查容器状态:将容器加锁后,检查容器状态。容器状态可以是paused, running, stopped, removalInProgress等等,start只能够处理stopped状态的容器
  • verifyContainerSettings()当cgroups变化的时候,可能需要调整容器的设置
  • adaptContainerSettings()如果docker更新了,旧版本的容器同样需要更新
  • 进入daemon.containerStart()
// ContainerStart starts a container.
func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.HostConfig, checkpoint string, checkpointDir string) error {
    if checkpoint != "" && !daemon.HasExperimental() {
        return errdefs.InvalidParameter(errors.New("checkpoint is only supported in experimental mode"))

    container, err := daemon.GetContainer(name)
    if err != nil {
        return err

    validateState := func() error {
        defer container.Unlock()

        if container.Paused {
            return errdefs.Conflict(errors.New("cannot start a paused container, try unpause instead"))

        if container.Running {
            return containerNotModifiedError{running: true}

        if container.RemovalInProgress || container.Dead {
            return errdefs.Conflict(errors.New("container is marked for removal and cannot be started"))
        return nil

    if err := validateState(); err != nil {
        return err

    // Windows does not have the backwards compatibility issue here.
    if runtime.GOOS != "windows" {
        // This is kept for backward compatibility - hostconfig should be passed when
        // creating a container, not during start.
        if hostConfig != nil {
            logrus.Warn("DEPRECATED: Setting host configuration options when the container starts is deprecated and has been removed in Docker 1.12")
            oldNetworkMode := container.HostConfig.NetworkMode
            if err := daemon.setSecurityOptions(container, hostConfig); err != nil {
                return errdefs.InvalidParameter(err)
            if err := daemon.mergeAndVerifyLogConfig(&hostConfig.LogConfig); err != nil {
                return errdefs.InvalidParameter(err)
            if err := daemon.setHostConfig(container, hostConfig); err != nil {
                return errdefs.InvalidParameter(err)
            newNetworkMode := container.HostConfig.NetworkMode
            if string(oldNetworkMode) != string(newNetworkMode) {
                // if user has change the network mode on starting, clean up the
                // old networks. It is a deprecated feature and has been removed in Docker 1.12
                container.NetworkSettings.Networks = nil
                if err := container.CheckpointTo(daemon.containersReplica); err != nil {
                    return errdefs.System(err)
    } else {
        if hostConfig != nil {
            return errdefs.InvalidParameter(errors.New("Supplying a hostconfig on start is not supported. It should be supplied on create"))

    // check if hostConfig is in line with the current system settings.
    // It may happen cgroups are umounted or the like.
    if _, err = daemon.verifyContainerSettings(container.OS, container.HostConfig, nil, false); err != nil {
        return errdefs.InvalidParameter(err)
    // Adapt for old containers in case we have updates in this function and
    // old containers never have chance to call the new function in create stage.
    if hostConfig != nil {
        if err := daemon.adaptContainerSettings(container.HostConfig, false); err != nil {
            return errdefs.InvalidParameter(err)
    return daemon.containerStart(container, checkpoint, checkpointDir, true)


  • 检查容器状态,比较trivial
  • 设置failover mechanism:
    • CheckpointTo()保存出错现场
    • 解锁容器
    • cleanup()清理容器
    • 当autoRemove启用时,ContainerRm()删除容器
  • conditionalMountOnStart()与挂载有关
  • initializeNetworking()网络
  • createSpec()文件系统
  • saveApparmorConfig()安全
  • containerd.Create()containerd.Start()交给containerd
  • setStateCounter()initHealthMonitor监控、心跳


// containerStart prepares the container to run by setting up everything the
// container needs, such as storage and networking, as well as links
// between containers. The container is left waiting for a signal to
// begin running.
func (daemon *Daemon) containerStart(container *container.Container, checkpoint string, checkpointDir string, resetRestartManager bool) (err error) {
    start := time.Now()
    defer container.Unlock()

    if resetRestartManager && container.Running { // skip this check if already in restarting step and resetRestartManager==false
        return nil

    if container.RemovalInProgress || container.Dead {
        return errdefs.Conflict(errors.New("container is marked for removal and cannot be started"))

    if checkpointDir != "" {
        // TODO(mlaventure): how would we support that?
        return errdefs.Forbidden(errors.New("custom checkpointdir is not supported"))

    // if we encounter an error during start we need to ensure that any other
    // setup has been cleaned up properly
    defer func() {
        if err != nil {
            // if no one else has set it, make sure we don't leave it at zero
            if container.ExitCode() == 0 {
            if err := container.CheckpointTo(daemon.containersReplica); err != nil {
                logrus.Errorf("%s: failed saving state on start failure: %v", container.ID, err)

            // if containers AutoRemove flag is set, remove it after clean up
            if container.HostConfig.AutoRemove {
                if err := daemon.ContainerRm(container.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil {
                    logrus.Errorf("can't remove container %s: %v", container.ID, err)

    if err := daemon.conditionalMountOnStart(container); err != nil {
        return err

    if err := daemon.initializeNetworking(container); err != nil {
        return err

    spec, err := daemon.createSpec(container)
    if err != nil {
        return errdefs.System(err)

    if resetRestartManager {

    if daemon.saveApparmorConfig(container); err != nil {
        return err

    if checkpoint != "" {
        checkpointDir, err = getCheckpointDir(checkpointDir, checkpoint, container.Name, container.ID, container.CheckpointDir(), false)
        if err != nil {
            return err

    createOptions, err := daemon.getLibcontainerdCreateOptions(container)
    if err != nil {
        return err

    err = daemon.containerd.Create(context.Background(), container.ID, spec, createOptions)
    if err != nil {
        return translateContainerdStartErr(container.Path, container.SetExitCode, err)

    // TODO(mlaventure): we need to specify checkpoint options here
    pid, err := daemon.containerd.Start(context.Background(), container.ID, checkpointDir,
        container.StreamConfig.Stdin() != nil || container.Config.Tty,
    if err != nil {
        if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil {
            logrus.WithError(err).WithField("container", container.ID).
                Error("failed to delete failed start container")
        return translateContainerdStartErr(container.Path, container.SetExitCode, err)

    container.SetRunning(pid, true)
    container.HasBeenManuallyStopped = false
    container.HasBeenStartedBefore = true


    if err := container.CheckpointTo(daemon.containersReplica); err != nil {
        logrus.WithError(err).WithField("container", container.ID).
            Errorf("failed to store container")

    daemon.LogContainerEvent(container, "start")

    return nil

