docker client创建容器过程
在docker-ce项目中可以看到docker client的API,在cli/command/container/cmd.go中列出了NewContainerCommand的列表,NewCreateCommand
就在其中。由于run其实被拆分为create和start两部,而client只负责parse flag,发送HTTP request,并不涉及什么heavy的业务逻辑,所以这里只分析create。
// NewCreateCommand creates a new cobra.Command for `docker create`
func NewCreateCommand(dockerCli command.Cli) *cobra.Command {
var opts createOptions
var copts *containerOptions
cmd := &cobra.Command{
Use: "create [OPTIONS] IMAGE [COMMAND] [ARG...]",
Short: "Create a new container",
Args: cli.RequiresMinArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
copts.Image = args[0]
if len(args) > 1 {
copts.Args = args[1:]
}
return runCreate(dockerCli, cmd.Flags(), &opts, copts)
},
}
flags := cmd.Flags()
flags.SetInterspersed(false)
flags.StringVar(&opts.name, "name", "", "Assign a name to the container")
// Add an explicit help that doesn't have a `-h` to prevent the conflict
// with hostname
flags.Bool("help", false, "Print usage")
command.AddPlatformFlag(flags, &opts.platform)
command.AddTrustVerificationFlags(flags, &opts.untrusted, dockerCli.ContentTrustEnabled())
copts = addFlags(flags)
return cmd
}
抛开parse flags不说,注册了runCreate()
函数,其中又调用了createContainer()
。
- 其中包含了networkingconfig,是用户指定的网络设置。
-
newCIDFile()
可以将container ID写入指定文件中 -
ContainerCreate()
创建容器 - 检查是否错误,如果错误类型为镜像找不到,调用
pullImage
拉去镜像,并再次执行ContainerCreate()
func runCreate(dockerCli command.Cli, flags *pflag.FlagSet, opts *createOptions, copts *containerOptions) error {
containerConfig, err := parse(flags, copts)
if err != nil {
reportError(dockerCli.Err(), "create", err.Error(), true)
return cli.StatusError{StatusCode: 125}
}
response, err := createContainer(context.Background(), dockerCli, containerConfig, opts)
if err != nil {
return err
}
fmt.Fprintln(dockerCli.Out(), response.ID)
return nil
}
func createContainer(ctx context.Context, dockerCli command.Cli, containerConfig *containerConfig, opts *createOptions) (*container.ContainerCreateCreatedBody, error) {
config := containerConfig.Config
hostConfig := containerConfig.HostConfig
networkingConfig := containerConfig.NetworkingConfig
stderr := dockerCli.Err()
var (
trustedRef reference.Canonical
namedRef reference.Named
)
containerIDFile, err := newCIDFile(hostConfig.ContainerIDFile)
if err != nil {
return nil, err
}
defer containerIDFile.Close()
ref, err := reference.ParseAnyReference(config.Image)
if err != nil {
return nil, err
}
if named, ok := ref.(reference.Named); ok {
namedRef = reference.TagNameOnly(named)
if taggedRef, ok := namedRef.(reference.NamedTagged); ok && !opts.untrusted {
var err error
trustedRef, err = image.TrustedReference(ctx, dockerCli, taggedRef, nil)
if err != nil {
return nil, err
}
config.Image = reference.FamiliarString(trustedRef)
}
}
//create the container
response, err := dockerCli.Client().ContainerCreate(ctx, config, hostConfig, networkingConfig, opts.name)
//if image not found try to pull it
if err != nil {
if apiclient.IsErrNotFound(err) && namedRef != nil {
fmt.Fprintf(stderr, "Unable to find image '%s' locally\n", reference.FamiliarString(namedRef))
// we don't want to write to stdout anything apart from container.ID
if err := pullImage(ctx, dockerCli, config.Image, opts.platform, stderr); err != nil {
return nil, err
}
if taggedRef, ok := namedRef.(reference.NamedTagged); ok && trustedRef != nil {
if err := image.TagTrusted(ctx, dockerCli, trustedRef, taggedRef); err != nil {
return nil, err
}
}
// Retry
var retryErr error
response, retryErr = dockerCli.Client().ContainerCreate(ctx, config, hostConfig, networkingConfig, opts.name)
if retryErr != nil {
return nil, retryErr
}
} else {
return nil, err
}
}
for _, warning := range response.Warnings {
fmt.Fprintf(stderr, "WARNING: %s\n", warning)
}
err = containerIDFile.Write(response.ID)
return &response, err
}
而client端执行的ContainerCreate()
最终将请求发送到daemon。
- 检查client版本是否符合要求。
- 通过
cli.post(ctx, "/containers/create", query, body, nil)
将请求发送给daemon,可以看到path指定了/containers/create,而post中调用了sendRequest()
负责发送请求。
// ContainerCreate creates a new container based in the given configuration.
// It can be associated with a name, but it's not mandatory.
func (cli *Client) ContainerCreate(ctx context.Context, config *container.Config, hostConfig *container.HostConfig, networkingConfig *network.NetworkingConfig, containerName string) (container.ContainerCreateCreatedBody, error) {
var response container.ContainerCreateCreatedBody
if err := cli.NewVersionError("1.25", "stop timeout"); config != nil && config.StopTimeout != nil && err != nil {
return response, err
}
// When using API 1.24 and under, the client is responsible for removing the container
if hostConfig != nil && versions.LessThan(cli.ClientVersion(), "1.25") {
hostConfig.AutoRemove = false
}
query := url.Values{}
if containerName != "" {
query.Set("name", containerName)
}
body := configWrapper{
Config: config,
HostConfig: hostConfig,
NetworkingConfig: networkingConfig,
}
serverResp, err := cli.post(ctx, "/containers/create", query, body, nil)
if err != nil {
if serverResp.statusCode == 404 && strings.Contains(err.Error(), "No such image") {
return response, objectNotFoundError{object: "image", id: config.Image}
}
return response, err
}
err = json.NewDecoder(serverResp.body).Decode(&response)
ensureReaderClosed(serverResp)
return response, err
}
docker daemon创建容器过程
在daemon中,由于router中已经设定了收到请求调用的router,故进入handler。create命令的router定义在api/server/router/container/container.go中
router.NewPostRoute("/containers/create", r.postContainersCreate),
首先进入handler postContainerCreate()
-
ParseForm()
和CheckForJSON()
都是做一些参数检查的工作 -
DecodeConfig()
解析出config, host config和networking config -
VersionFromContext()
解析出版本号,对CPU shares做调整 - 进入
ContainerCreate()
函数 - 返回结果的json序列
func (s *containerRouter) postContainersCreate(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if err := httputils.ParseForm(r); err != nil {
return err
}
if err := httputils.CheckForJSON(r); err != nil {
return err
}
name := r.Form.Get("name")
config, hostConfig, networkingConfig, err := s.decoder.DecodeConfig(r.Body)
if err != nil {
return err
}
version := httputils.VersionFromContext(ctx)
adjustCPUShares := versions.LessThan(version, "1.19")
// When using API 1.24 and under, the client is responsible for removing the container
if hostConfig != nil && versions.LessThan(version, "1.25") {
hostConfig.AutoRemove = false
}
ccr, err := s.backend.ContainerCreate(types.ContainerCreateConfig{
Name: name,
Config: config,
HostConfig: hostConfig,
NetworkingConfig: networkingConfig,
AdjustCPUShares: adjustCPUShares,
})
if err != nil {
return err
}
return httputils.WriteJSON(w, http.StatusCreated, ccr)
}
ContainerCreate()
会调用daemon.containerCreate()
-
GetImage()
根据image信息在imageService中获取镜像。 -
verifyContainerSettings()
校验HostConfig和Config包含的容器设定是否合法 -
verifyNetworkingConfig()
检查网络设置是否合法,容器是否能介入到指定的网络中 -
adaptContainerSettings()
调整HostConfig中CPU、内存、安全性等相关的一些设定 -
daemon.create()
创建容器
func (daemon *Daemon) containerCreate(params types.ContainerCreateConfig, managed bool) (containertypes.ContainerCreateCreatedBody, error) {
start := time.Now()
if params.Config == nil {
return containertypes.ContainerCreateCreatedBody{}, errdefs.InvalidParameter(errors.New("Config cannot be empty in order to create a container"))
}
os := runtime.GOOS
if params.Config.Image != "" {
img, err := daemon.imageService.GetImage(params.Config.Image)
if err == nil {
os = img.OS
}
} else {
// This mean scratch. On Windows, we can safely assume that this is a linux
// container. On other platforms, it's the host OS (which it already is)
if runtime.GOOS == "windows" && system.LCOWSupported() {
os = "linux"
}
}
warnings, err := daemon.verifyContainerSettings(os, params.HostConfig, params.Config, false)
if err != nil {
return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, errdefs.InvalidParameter(err)
}
err = verifyNetworkingConfig(params.NetworkingConfig)
if err != nil {
return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, errdefs.InvalidParameter(err)
}
if params.HostConfig == nil {
params.HostConfig = &containertypes.HostConfig{}
}
err = daemon.adaptContainerSettings(params.HostConfig, params.AdjustCPUShares)
if err != nil {
return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, errdefs.InvalidParameter(err)
}
container, err := daemon.create(params, managed)
if err != nil {
return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, err
}
containerActions.WithValues("create").UpdateSince(start)
return containertypes.ContainerCreateCreatedBody{ID: container.ID, Warnings: warnings}, nil
}
daemon.create()
的逻辑:
- 获取镜像信息(似乎和上面的步骤有重复的地方?都从imageService上获取景象信息并校验操作系统信息)
-
mergeAndVerifyConfig()
和mergeAndVerifyLogConfig()
对配置信息做合并和检查 -
daemon.newContainer()
创建容器:-
generateIDAndName()
生成容器的ID和名称,如果用户没有指定name则生成随机名称,指定则保留当前命名 - 检查网络模式是否是Host模式,是则获取当前系统的hostname,否则从配置文件获取hostname
-
getEntrypointAndArgs()
获取entry point(是什么?)已经进入创建人容器后要执行的命令 -
newBaseContainer()
根据ID创建容器,其中调用了container.NewBaseContainer()
,生成了容器实例,之后对容器做一些基本的配置
-
-
setSecurityOptions()
设定容器的安全性,调用了parseSecurityOpt()
,根据hostconfig配置容器,用户在创建容器的时候可以指定--security-opt,docker根据安全等级设定相应的安全机制 - 设置容器的storage option,挂载点
-
CreateLayer()
创建容器的文件系统 -
MkdirAndChown()
设置容器的文件权限为700 -
setHostConfig()
设置挂载点、网络模式,并checkpoint到本地磁盘(意味着容器创建的时候就会有一个checkpoint) -
createContainerOSSpecificSettings()
挂载容器,设定容器在host上的目录和volumn容器卷 -
updateContainerNetworkSettings()
更新网络设置:如果禁用网络,直接返回,否则区分是默认模式还是用户指定了网络。对各种情况做不同的设定之后返回 -
Register()
标志容器在daemon中可用 -
set(container.ID, "stopped")
初始化已经完成,容器标记为stopped状态
func (daemon *Daemon) create(params types.ContainerCreateConfig, managed bool) (retC *container.Container, retErr error) {
var (
container *container.Container
img *image.Image
imgID image.ID
err error
)
os := runtime.GOOS
if params.Config.Image != "" {
img, err = daemon.imageService.GetImage(params.Config.Image)
if err != nil {
return nil, err
}
if img.OS != "" {
os = img.OS
} else {
// default to the host OS except on Windows with LCOW
if runtime.GOOS == "windows" && system.LCOWSupported() {
os = "linux"
}
}
imgID = img.ID()
if runtime.GOOS == "windows" && img.OS == "linux" && !system.LCOWSupported() {
return nil, errors.New("operating system on which parent image was created is not Windows")
}
} else {
if runtime.GOOS == "windows" {
os = "linux" // 'scratch' case.
}
}
if err := daemon.mergeAndVerifyConfig(params.Config, img); err != nil {
return nil, errdefs.InvalidParameter(err)
}
if err := daemon.mergeAndVerifyLogConfig(¶ms.HostConfig.LogConfig); err != nil {
return nil, errdefs.InvalidParameter(err)
}
if container, err = daemon.newContainer(params.Name, os, params.Config, params.HostConfig, imgID, managed); err != nil {
return nil, err
}
defer func() {
if retErr != nil {
if err := daemon.cleanupContainer(container, true, true); err != nil {
logrus.Errorf("failed to cleanup container on create error: %v", err)
}
}
}()
if err := daemon.setSecurityOptions(container, params.HostConfig); err != nil {
return nil, err
}
container.HostConfig.StorageOpt = params.HostConfig.StorageOpt
// Fixes: https://github.com/moby/moby/issues/34074 and
// https://github.com/docker/for-win/issues/999.
// Merge the daemon's storage options if they aren't already present. We only
// do this on Windows as there's no effective sandbox size limit other than
// physical on Linux.
if runtime.GOOS == "windows" {
if container.HostConfig.StorageOpt == nil {
container.HostConfig.StorageOpt = make(map[string]string)
}
for _, v := range daemon.configStore.GraphOptions {
opt := strings.SplitN(v, "=", 2)
if _, ok := container.HostConfig.StorageOpt[opt[0]]; !ok {
container.HostConfig.StorageOpt[opt[0]] = opt[1]
}
}
}
// Set RWLayer for container after mount labels have been set
rwLayer, err := daemon.imageService.CreateLayer(container, setupInitLayer(daemon.idMappings))
if err != nil {
return nil, errdefs.System(err)
}
container.RWLayer = rwLayer
rootIDs := daemon.idMappings.RootPair()
if err := idtools.MkdirAndChown(container.Root, 0700, rootIDs); err != nil {
return nil, err
}
if err := idtools.MkdirAndChown(container.CheckpointDir(), 0700, rootIDs); err != nil {
return nil, err
}
if err := daemon.setHostConfig(container, params.HostConfig); err != nil {
return nil, err
}
if err := daemon.createContainerOSSpecificSettings(container, params.Config, params.HostConfig); err != nil {
return nil, err
}
var endpointsConfigs map[string]*networktypes.EndpointSettings
if params.NetworkingConfig != nil {
endpointsConfigs = params.NetworkingConfig.EndpointsConfig
}
// Make sure NetworkMode has an acceptable value. We do this to ensure
// backwards API compatibility.
runconfig.SetDefaultNetModeIfBlank(container.HostConfig)
daemon.updateContainerNetworkSettings(container, endpointsConfigs)
if err := daemon.Register(container); err != nil {
return nil, err
}
stateCtr.set(container.ID, "stopped")
daemon.LogContainerEvent(container, "create")
return container, nil
}
docker daemon启动容器过程
start容器时,同样首先进入router postContainerStart()
,有类似的步骤:
- 获取version信息,和r.ContentLength一起解析出hostconfig
-
ParseForm()
确保request正确的解析,保存在r.Form中 - 从r.Form中获取checkpoint和checkpoint-dir,恢复容器现场。docker将CRIU集成之后,命名为checkpoint,可以将容器上下文保存、恢复,用户可以docker start --checkpoint xxx来使用这个功能
- 进入
ContainerStart()
func (s *containerRouter) postContainersStart(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
// If contentLength is -1, we can assumed chunked encoding
// or more technically that the length is unknown
// https://golang.org/src/pkg/net/http/request.go#L139
// net/http otherwise seems to swallow any headers related to chunked encoding
// including r.TransferEncoding
// allow a nil body for backwards compatibility
version := httputils.VersionFromContext(ctx)
var hostConfig *container.HostConfig
// A non-nil json object is at least 7 characters.
if r.ContentLength > 7 || r.ContentLength == -1 {
if versions.GreaterThanOrEqualTo(version, "1.24") {
return bodyOnStartError{}
}
if err := httputils.CheckForJSON(r); err != nil {
return err
}
c, err := s.decoder.DecodeHostConfig(r.Body)
if err != nil {
return err
}
hostConfig = c
}
if err := httputils.ParseForm(r); err != nil {
return err
}
checkpoint := r.Form.Get("checkpoint")
checkpointDir := r.Form.Get("checkpoint-dir")
if err := s.backend.ContainerStart(vars["name"], hostConfig, checkpoint, checkpointDir); err != nil {
return err
}
w.WriteHeader(http.StatusNoContent)
return nil
}
进入containerStart后:
- 由于checkpoint模式目前只是experimental的,首先判断是否使用了checkpoint flag,如果使用了是否启用了experimental功能。
-
daemon.GetContainer()
从name或者ID获取容器,完整的ID可以唯一的对应容器,container name也可以通过GetByName()
映射为ID,如果只传递了部分的ID,将判断是否查找到唯一的容器 -
validateState()
检查容器状态:将容器加锁后,检查容器状态。容器状态可以是paused, running, stopped, removalInProgress等等,start只能够处理stopped状态的容器 -
verifyContainerSettings()
当cgroups变化的时候,可能需要调整容器的设置 -
adaptContainerSettings()
如果docker更新了,旧版本的容器同样需要更新 - 进入
daemon.containerStart()
// ContainerStart starts a container.
func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.HostConfig, checkpoint string, checkpointDir string) error {
if checkpoint != "" && !daemon.HasExperimental() {
return errdefs.InvalidParameter(errors.New("checkpoint is only supported in experimental mode"))
}
container, err := daemon.GetContainer(name)
if err != nil {
return err
}
validateState := func() error {
container.Lock()
defer container.Unlock()
if container.Paused {
return errdefs.Conflict(errors.New("cannot start a paused container, try unpause instead"))
}
if container.Running {
return containerNotModifiedError{running: true}
}
if container.RemovalInProgress || container.Dead {
return errdefs.Conflict(errors.New("container is marked for removal and cannot be started"))
}
return nil
}
if err := validateState(); err != nil {
return err
}
// Windows does not have the backwards compatibility issue here.
if runtime.GOOS != "windows" {
// This is kept for backward compatibility - hostconfig should be passed when
// creating a container, not during start.
if hostConfig != nil {
logrus.Warn("DEPRECATED: Setting host configuration options when the container starts is deprecated and has been removed in Docker 1.12")
oldNetworkMode := container.HostConfig.NetworkMode
if err := daemon.setSecurityOptions(container, hostConfig); err != nil {
return errdefs.InvalidParameter(err)
}
if err := daemon.mergeAndVerifyLogConfig(&hostConfig.LogConfig); err != nil {
return errdefs.InvalidParameter(err)
}
if err := daemon.setHostConfig(container, hostConfig); err != nil {
return errdefs.InvalidParameter(err)
}
newNetworkMode := container.HostConfig.NetworkMode
if string(oldNetworkMode) != string(newNetworkMode) {
// if user has change the network mode on starting, clean up the
// old networks. It is a deprecated feature and has been removed in Docker 1.12
container.NetworkSettings.Networks = nil
if err := container.CheckpointTo(daemon.containersReplica); err != nil {
return errdefs.System(err)
}
}
container.InitDNSHostConfig()
}
} else {
if hostConfig != nil {
return errdefs.InvalidParameter(errors.New("Supplying a hostconfig on start is not supported. It should be supplied on create"))
}
}
// check if hostConfig is in line with the current system settings.
// It may happen cgroups are umounted or the like.
if _, err = daemon.verifyContainerSettings(container.OS, container.HostConfig, nil, false); err != nil {
return errdefs.InvalidParameter(err)
}
// Adapt for old containers in case we have updates in this function and
// old containers never have chance to call the new function in create stage.
if hostConfig != nil {
if err := daemon.adaptContainerSettings(container.HostConfig, false); err != nil {
return errdefs.InvalidParameter(err)
}
}
return daemon.containerStart(container, checkpoint, checkpointDir, true)
}
daemon.containerStart()
的逻辑:
- 检查容器状态,比较trivial
- 设置failover mechanism:
-
CheckpointTo()
保存出错现场 - 解锁容器
-
cleanup()
清理容器 - 当autoRemove启用时,
ContainerRm()
删除容器
-
-
conditionalMountOnStart()
与挂载有关 -
initializeNetworking()
网络 -
createSpec()
文件系统 -
saveApparmorConfig()
安全 -
containerd.Create()
和containerd.Start()
交给containerd
管理容器 -
setStateCounter()
和initHealthMonitor
监控、心跳
之后会逐个深入。
// containerStart prepares the container to run by setting up everything the
// container needs, such as storage and networking, as well as links
// between containers. The container is left waiting for a signal to
// begin running.
func (daemon *Daemon) containerStart(container *container.Container, checkpoint string, checkpointDir string, resetRestartManager bool) (err error) {
start := time.Now()
container.Lock()
defer container.Unlock()
if resetRestartManager && container.Running { // skip this check if already in restarting step and resetRestartManager==false
return nil
}
if container.RemovalInProgress || container.Dead {
return errdefs.Conflict(errors.New("container is marked for removal and cannot be started"))
}
if checkpointDir != "" {
// TODO(mlaventure): how would we support that?
return errdefs.Forbidden(errors.New("custom checkpointdir is not supported"))
}
// if we encounter an error during start we need to ensure that any other
// setup has been cleaned up properly
defer func() {
if err != nil {
container.SetError(err)
// if no one else has set it, make sure we don't leave it at zero
if container.ExitCode() == 0 {
container.SetExitCode(128)
}
if err := container.CheckpointTo(daemon.containersReplica); err != nil {
logrus.Errorf("%s: failed saving state on start failure: %v", container.ID, err)
}
container.Reset(false)
daemon.Cleanup(container)
// if containers AutoRemove flag is set, remove it after clean up
if container.HostConfig.AutoRemove {
container.Unlock()
if err := daemon.ContainerRm(container.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil {
logrus.Errorf("can't remove container %s: %v", container.ID, err)
}
container.Lock()
}
}
}()
if err := daemon.conditionalMountOnStart(container); err != nil {
return err
}
if err := daemon.initializeNetworking(container); err != nil {
return err
}
spec, err := daemon.createSpec(container)
if err != nil {
return errdefs.System(err)
}
if resetRestartManager {
container.ResetRestartManager(true)
}
if daemon.saveApparmorConfig(container); err != nil {
return err
}
if checkpoint != "" {
checkpointDir, err = getCheckpointDir(checkpointDir, checkpoint, container.Name, container.ID, container.CheckpointDir(), false)
if err != nil {
return err
}
}
createOptions, err := daemon.getLibcontainerdCreateOptions(container)
if err != nil {
return err
}
err = daemon.containerd.Create(context.Background(), container.ID, spec, createOptions)
if err != nil {
return translateContainerdStartErr(container.Path, container.SetExitCode, err)
}
// TODO(mlaventure): we need to specify checkpoint options here
pid, err := daemon.containerd.Start(context.Background(), container.ID, checkpointDir,
container.StreamConfig.Stdin() != nil || container.Config.Tty,
container.InitializeStdio)
if err != nil {
if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil {
logrus.WithError(err).WithField("container", container.ID).
Error("failed to delete failed start container")
}
return translateContainerdStartErr(container.Path, container.SetExitCode, err)
}
container.SetRunning(pid, true)
container.HasBeenManuallyStopped = false
container.HasBeenStartedBefore = true
daemon.setStateCounter(container)
daemon.initHealthMonitor(container)
if err := container.CheckpointTo(daemon.containersReplica); err != nil {
logrus.WithError(err).WithField("container", container.ID).
Errorf("failed to store container")
}
daemon.LogContainerEvent(container, "start")
containerActions.WithValues("start").UpdateSince(start)
return nil
}