【kubernetes/k8s概念】CNI plugin calico源码分析

https://github.com/projectcalico/cni-plugin,release-v3.8

       calico 解决不同物理机上容器之间的通信,而 calico-plugin 是在 k8s 创建 Pod 时为 Pod 设置虚拟网卡(容器中的 eth0和 lo 网卡),calico-plugin 是由两个静态的二进制文件组成,由 kubelet 以命令行的形式调用,这两个二进制的作用如下:

  • calico-ipam:分配维护IP,依赖etcd
  • calico:系统调用API来修改namespace中的网卡信息

calico插件配置

# cat /etc/cni/net.d/10-calico.conf 
{
    "name": "calico-k8s-network",
    "cniVersion": "0.1.0",
    "type": "calico",
    "etcd_endpoints": "https://node1:2379,https://node2:2379,https://node3:2379",
    "etcd_key_file": "/etc/calico/ssl/calico-key.pem",
    "etcd_cert_file": "/etc/calico/ssl/calico.pem",
    "etcd_ca_cert_file": "/etc/calico/ssl/ca.pem",
    "log_level": "info",
    "mtu": 1500,
    "ipam": {
        "type": "calico-ipam"
    },
    "policy": {
        "type": "k8s"
    },
    "kubernetes": {
        "kubeconfig": "/root/.kube/config"
    }
}

{
  "name": "k8s-pod-network",
  "cniVersion": "0.3.0",
  "plugins": [
    {
      "type": "calico",
      "log_level": "info",
      "datastore_type": "kubernetes",
      "nodename": "master-node",
      "mtu": 1440,
      "ipam": {
          "type": "calico-ipam"
      },
      "policy": {
          "type": "k8s"
      },
      "kubernetes": {
          "kubeconfig": "/etc/cni/net.d/calico-kubeconfig"
      }
    },
    {
      "type": "portmap",
      "snat": true,
      "capabilities": {"portMappings": true}
    }
  ]
}

 

calico-plugin工作原理

       kubelet在创建一个Pod,首先启动pause容器,然后为pause容器添加设置网络,也就是添加网卡,这里会通过CNI调起文件系统中的/opt/cni/bin/calico,并将Pod信息通过标准输入(stdin)传递给 calico 进程,calico 通过修改系统中 Namespace

 

1、main函数

  • 与其他plugin代码一样的套路,主要调用skel.PluginMain函数,主要分析cmdAdd函数
func main() {
	// Set up logging formatting.
	logrus.SetFormatter(&logutils.Formatter{})

	// Install a hook that adds file/line no information.
	logrus.AddHook(&logutils.ContextHook{})

	// Display the version on "-v", otherwise just delegate to the skel code.
	// Use a new flag set so as not to conflict with existing libraries which use "flag"
	flagSet := flag.NewFlagSet("Calico", flag.ExitOnError)

	version := flagSet.Bool("v", false, "Display version")
	err := flagSet.Parse(os.Args[1:])
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}
	if *version {
		fmt.Println(VERSION)
		os.Exit(0)
	}

	if err := utils.AddIgnoreUnknownArgs(); err != nil {
		os.Exit(1)
	}

	skel.PluginMain(cmdAdd, cmdDel, cniSpecVersion.All)
}

 

2、cmdAdd函数

      从标准输入输出读取配置信息,获取 nodename

func cmdAdd(args *skel.CmdArgs) error {
	// Unmarshal the network config, and perform validation
	conf := types.NetConf{}
	if err := json.Unmarshal(args.StdinData, &conf); err != nil {
		return fmt.Errorf("failed to load netconf: %v", err)
	}

	utils.ConfigureLogging(conf.LogLevel)

	if !conf.NodenameFileOptional {
		// Configured to wait for the nodename file - don't start until it exists.
		if _, err := os.Stat("/var/lib/calico/nodename"); err != nil {
			s := "%s: check that the calico/node container is running and has mounted /var/lib/calico/"
			return fmt.Errorf(s, err)
		}
		logrus.Debug("/var/lib/calico/nodename exists")
	}

	// Determine which node name to use.
	nodename := utils.DetermineNodename(conf)

    2.1 CreateClient 函数

      配置从标准输入输出传入,解析一大堆 etcd 配置参数,kube config 配置文件,后端存储 type等

      loadClientConfig 函数 load 客户端配置如果指定了文件,默认未制定文件则从环境变量读取配置

// LoadClientConfig loads the ClientConfig from the specified file (if specified)
// or from environment variables (if the file is not specified).
func LoadClientConfig(filename string) (*CalicoAPIConfig, error) {

	// Override / merge with values loaded from the specified file.
	if filename != "" {
		b, err := ioutil.ReadFile(filename)
		if err != nil {
			return nil, err
		}

		c, err := LoadClientConfigFromBytes(b)
		if err != nil {
			return nil, fmt.Errorf("syntax error in %s: %v", filename, err)
		}
		return c, nil
	}
	return LoadClientConfigFromEnvironment()
}

      2.1.1 创建一个客户端连接,可以为 etcd 或者 kubernetes

// New returns a connected client. The ClientConfig can either be created explicitly,
// or can be loaded from a config file or environment variables using the LoadClientConfig() function.
func New(config apiconfig.CalicoAPIConfig) (Interface, error) {
	be, err := backend.NewClient(config)
	if err != nil {
		return nil, err
	}
	return client{
		config:    config,
		backend:   be,
		resources: &resources{backend: be},
	}, nil
}

     2.1.2 NewClient 创建客户端连接,使用 etcdv3 或者 kubernetes

// NewClient creates a new backend datastore client.
func NewClient(config apiconfig.CalicoAPIConfig) (c bapi.Client, err error) {
	log.Debugf("Using datastore type '%s'", config.Spec.DatastoreType)
	switch config.Spec.DatastoreType {
	case apiconfig.EtcdV3:
		c, err = etcdv3.NewEtcdV3Client(&config.Spec.EtcdConfig)
	case apiconfig.Kubernetes:
		c, err = k8s.NewKubeClient(&config.Spec)
	default:
		err = errors.New(fmt.Sprintf("Unknown datastore type: %v",
			config.Spec.DatastoreType))
	}
	return
}

    2.2 查询 default 的 ClusterInformation 资源,而且 datastoreReady 值必须为 true

apiVersion: crd.projectcalico.org/v1
kind: ClusterInformation
metadata:
  annotations:
    projectcalico.org/metadata: '{"uid":"f87c290b-b0f5-11e9-8106-080027603363","creationTimestamp":"2019-07-28T05:10:09Z"}'
  creationTimestamp: "2019-07-28T05:10:09Z"
  generation: 1
  name: default
  resourceVersion: "2444411"
  selfLink: /apis/crd.projectcalico.org/v1/clusterinformations/default
  uid: f87c8c27-b0f5-11e9-a59b-080027603363
spec:
  calicoVersion: v3.7.4
  clusterGUID: 063ddadf474343059e1e446326172d97
  clusterType: k8s,bgp,kdd
  datastoreReady: true

ctx := context.Background()
ci, err := calicoClient.ClusterInformation().Get(ctx, "default", options.GetOptions{})
if err != nil {
	return fmt.Errorf("error getting ClusterInformation: %v", err)
}
if *ci.Spec.DatastoreReady != true {
	logrus.Info("Upgrade may be in progress, ready flag is not set")
	return fmt.Errorf("Calico is currently not ready to process requests")
}

    2.3 如果查询 endpoints,namespace name 都匹配则使用原来的 name 

if len(endpoints.Items) > 0 {
	logger.Debugf("List of WorkloadEndpoints %v", endpoints.Items)
	for _, ep := range endpoints.Items {
		match, err := wepIDs.WorkloadEndpointIdentifiers.NameMatches(ep.Name)
		if err != nil {
			// We should never hit this error, because it should have already been
			// caught by CalculateWorkloadEndpointName.
			return fmt.Errorf("invalid WorkloadEndpoint identifiers: %v", wepIDs.WorkloadEndpointIdentifiers)
		}

		if match {
			logger.Debugf("Found a match for WorkloadEndpoint: %v", ep)
			endpoint = &ep
			// Assign the WEP name to wepIDs' WEPName field.
			wepIDs.WEPName = endpoint.Name
			// Put the endpoint name from the matched WEP in the identifiers.
			wepIDs.Endpoint = ep.Spec.Endpoint
			logger.Infof("Calico CNI found existing endpoint: %v", endpoint)
			break
		}
	}
}

 

3 k8s.CmdAddK8s函数

// CmdAddK8s performs the "ADD" operation on a kubernetes pod
// Having kubernetes code in its own file avoids polluting the mainline code. It's expected that the kubernetes case will
// more special casing than the mainline code.
func CmdAddK8s(ctx context.Context, args *skel.CmdArgs, conf types.NetConf, epIDs utils.WEPIdentifiers, calicoClient calicoclient.Interface, endpoint *api.WorkloadEndpoint) (*current.Result, error) {
	var err error
	var result *current.Result

	utils.ConfigureLogging(conf.LogLevel)

	logger := logrus.WithFields(logrus.Fields{
		"WorkloadEndpoint": epIDs.WEPName,
		"ContainerID":      epIDs.ContainerID,
		"Pod":              epIDs.Pod,
		"Namespace":        epIDs.Namespace,
	})

    3.1 IPAM 类型为 host-local

      提取配置参数,包括 ipam route,如果未设置路由则使用默认路由,0.0.0.0/0

    3.2 policy type 为 k8s

【kubernetes/k8s概念】CNI plugin calico源码分析_第1张图片

    getK8sNSInfo 提取 namespace 的注解

    getK8sPodInfo 提取 pod 的 label 注解 ports profiles

label
projectcalico.org/namespace
label
projectcalico.org/serviceaccount
label
projectcalico.org/orchestrator
注解
cni.projectcalico.org/floatingIPs
// Only attempt to fetch the labels and annotations from Kubernetes
// if the policy type has been set to "k8s". This allows users to
// run the plugin under Kubernetes without needing it to access the
// Kubernetes API
if conf.Policy.PolicyType == "k8s" {
	var err error

	annotNS, err = getK8sNSInfo(client, epIDs.Namespace)
	if err != nil {
		return nil, err
	}
	logger.WithField("NS Annotations", annotNS).Debug("Fetched K8s namespace annotations")

	labels, annot, ports, profiles, generateName, err = getK8sPodInfo(client, epIDs.Pod, epIDs.Namespace)

    3.3 如果 IPAM 类型为 calico-ipam 插件

      提取注解中的 ippool,根据 key 为 ipv4 cni.projectcalico.org/ipv4pools,ipv6 为 cni.projectcalico.org/ipv6pools

// Check for calico IPAM specific annotations and set them if needed.
if conf.IPAM.Type == "calico-ipam" {

	var v4pools, v6pools string

	// Sets  the Namespace annotation for IP pools as default
	v4pools = annotNS["cni.projectcalico.org/ipv4pools"]
	v6pools = annotNS["cni.projectcalico.org/ipv6pools"]

	// Gets the POD annotation for IP Pools and overwrites Namespace annotation if it exists
	v4poolpod := annot["cni.projectcalico.org/ipv4pools"]
	if len(v4poolpod) != 0 {
		v4pools = v4poolpod
	}
	v6poolpod := annot["cni.projectcalico.org/ipv6pools"]
	if len(v6poolpod) != 0 {
		v6pools = v6poolpod
	}

    3.4 如果注解中存在 ippool,则设置 ipam 地址池

if len(v4pools) > 0 {
	if err := json.Unmarshal([]byte(v4pools), &v4PoolSlice); err != nil {
		logger.WithField("IPv4Pool", v4pools).Error("Error parsing IPv4 IPPools")
		return nil, err
	}

	if _, ok := stdinData["ipam"].(map[string]interface{}); !ok {
		logger.Fatal("Error asserting stdinData type")
		os.Exit(0)
	}
	stdinData["ipam"].(map[string]interface{})["ipv4_pools"] = v4PoolSlice
	logger.WithField("ipv4_pools", v4pools).Debug("Setting IPv4 Pools")
}

    3.5 提取注解信息,主要是提供固定 IP 

       cni.projectcalico.org/ipAddrs:指定一个要分配给 Pod 的 IPv4和/ 或 IPv6 地址列表。 请求的 IP 地址将从 Calico IPAM 分配,并且必须存在于已配置的 IP pool 中

       cni.projectcalico.org/ipAddrsNoIpam: 指定一个要分配给 Pod 的 IPv4 和/或 IPv6 地址列表,绕过 IPAM。 任何 IP 冲突和路由配置都必须由人工或其他系统处理。 Calico 仅处理那些属于 Calico IP pool  中的 IP 地址,将其路由分发到 Pod。 如果分配的 IP 地址不在 Calico IP pool 中,则须确保其他机制正确地处理该IP地址的路由

ipAddrsNoIpam := annot["cni.projectcalico.org/ipAddrsNoIpam"]
ipAddrs := annot["cni.projectcalico.org/ipAddrs"]

    3.6 如果没有指定 IP,则调用 ipam.Exec 申请 IP 地址 

      cni.projectcalico.org/ipAddrs 和  cni.projectcalico.org/ipAddrsNoIpam 不能同时设置

// Switch based on which annotations are passed or not passed.
switch {
case ipAddrs == "" && ipAddrsNoIpam == "":
	// Call the IPAM plugin.
	result, err = utils.AddIPAM(conf, args, logger)
	if err != nil {
		return nil, err
	}
case ipAddrs != "" && ipAddrsNoIpam != "":
	// Can't have both ipAddrs and ipAddrsNoIpam annotations at the same time.
	e := fmt.Errorf("can't have both annotations: 'ipAddrs' and 'ipAddrsNoIpam' in use at the same time")
	logger.Error(e)
	return nil, e

    3.7 绕过 ipam 情况

      调用 overrideIPAMResult 函数,简单的验证 ip 合法性,直接返回设置的 IP 地址

case ipAddrsNoIpam != "":
	// Validate that we're allowed to use this feature.
	if conf.IPAM.Type != "calico-ipam" {
		e := fmt.Errorf("ipAddrsNoIpam is not compatible with configured IPAM: %s", conf.IPAM.Type)
		logger.Error(e)
		return nil, e
	}
	if !conf.FeatureControl.IPAddrsNoIpam {
		e := fmt.Errorf("requested feature is not enabled: ip_addrs_no_ipam")
		logger.Error(e)
		return nil, e
	}

	// ipAddrsNoIpam annotation is set so bypass IPAM, and set the IPs manually.
	overriddenResult, err := overrideIPAMResult(ipAddrsNoIpam, logger)
	if err != nil {
		return nil, err
	}
	logger.Debugf("Bypassing IPAM to set the result to: %+v", overriddenResult)

	// Convert overridden IPAM result into current Result.
	// This method fill in all the empty fields necessory for CNI output according to spec.
	result, err = current.NewResultFromResult(overriddenResult)
	if err != nil {
		return nil, err
	}

	if len(result.IPs) == 0 {
		return nil, errors.New("failed to build result")
	}

    3.8 指定 IP 地址的情况

     如果 endpoint 已经存在时,释放先前的 IP

case ipAddrs != "":
	// Validate that we're allowed to use this feature.
	if conf.IPAM.Type != "calico-ipam" {
		e := fmt.Errorf("ipAddrs is not compatible with configured IPAM: %s", conf.IPAM.Type)
		logger.Error(e)
		return nil, e
	}

	// If the endpoint already exists, we need to attempt to release the previous IP addresses here
	// since the ADD call will fail when it tries to reallocate the same IPs. releaseIPAddrs assumes
	// that Calico IPAM is in use, which is OK here since only Calico IPAM supports the ipAddrs
	// annotation.
	if endpoint != nil {
		logger.Info("Endpoint already exists and ipAddrs is set. Release any old IPs")
		if err := releaseIPAddrs(endpoint.Spec.IPNetworks, calicoClient, logger); err != nil {
			return nil, fmt.Errorf("failed to release ipAddrs: %s", err)
		}
	}

	// When ipAddrs annotation is set, we call out to the configured IPAM plugin
	// requesting the specific IP addresses included in the annotation.
	result, err = ipAddrsResult(ipAddrs, conf, args, logger)
	if err != nil {
		return nil, err
	}
	logger.Debugf("IPAM result set to: %+v", result)
}

     3.9 配置 endpoint 为其赋值

// Configure the endpoint (creating if required).
if endpoint == nil {
	logger.Debug("Initializing new WorkloadEndpoint resource")
	endpoint = api.NewWorkloadEndpoint()
}
endpoint.Name = epIDs.WEPName
endpoint.Namespace = epIDs.Namespace
endpoint.Labels = labels
endpoint.GenerateName = generateName
endpoint.Spec.Endpoint = epIDs.Endpoint
endpoint.Spec.Node = epIDs.Node
endpoint.Spec.Orchestrator = epIDs.Orchestrator
endpoint.Spec.Pod = epIDs.Pod
endpoint.Spec.Ports = ports
endpoint.Spec.IPNetworks = []string{}

    3.10 调用 DoNetworking 为其配置网络

       创建 endpoint 的操作则是,调用 ip link add $contVethName type veth peer name $hostVethName 创建 veth pair

       创建 169.254.1.1 的默认网络路由,将 host veth 端移到 host 的 namespace

       第 6 章节详细讲解

// Whether the endpoint existed or not, the veth needs (re)creating.
hostVethName := k8sconversion.VethNameForWorkload(epIDs.Namespace, epIDs.Pod)
_, contVethMac, err := utils.DoNetworking(args, conf, result, logger, hostVethName, routes)
if err != nil {
	logger.WithError(err).Error("Error setting up networking")
	releaseIPAM()
	return nil, err
}

 

    分析 cni 的操作,不支持配置 "feature_control": { "ip_addrs_no_ipam": true }

// Default CNI behavior
// Validate enabled features
if conf.FeatureControl.IPAddrsNoIpam {
	return errors.New("requested feature is not supported for this runtime: ip_addrs_no_ipam")
}

 

4. 如果存在相同的 namespace name 的 endpoint 

     不需要在创建 endpoint,也不需要创建 veth pair,只需要更新 profile

     CreateResultFromEndpoint 函数从 workloadEndpoint 中抽出 IP 信息,作为返回的 IP 地址

endpointAlreadyExisted := endpoint != nil
if endpointAlreadyExisted {
	// There is an existing endpoint - no need to create another.
	// This occurs when adding an existing container to a new CNI network
	// Find the IP address from the endpoint and use that in the response.
	// Don't create the veth or do any networking.
	// Just update the profile on the endpoint. The profile will be created if needed during the
	// profile processing step.
	foundProfile := false
	for _, p := range endpoint.Spec.Profiles {
		if p == profileID {
			logger.Infof("Calico CNI endpoint already has profile: %s\n", profileID)
			foundProfile = true
			break
		}
	}
	if !foundProfile {
		logger.Infof("Calico CNI appending profile: %s\n", profileID)
		endpoint.Spec.Profiles = append(endpoint.Spec.Profiles, profileID)
	}
	result, err = utils.CreateResultFromEndpoint(endpoint)
	logger.WithField("result", result).Debug("Created result from endpoint")
	if err != nil {
		return err
	}
}

    4.1 CreateResultFromEndpoint 函数

    从 workloadEndpoint 中抽出 IP 信息,作为返回的 IP 地址

// CreateResultFromEndpoint takes a WorkloadEndpoint, extracts IP information
// and populates that into a CNI Result.
func CreateResultFromEndpoint(wep *api.WorkloadEndpoint) (*current.Result, error) {
	result := ¤t.Result{}
	for _, v := range wep.Spec.IPNetworks {
		parsedIPConfig := current.IPConfig{}

		ipAddr, ipNet, err := net.ParseCIDR(v)
		if err != nil {
			return nil, err
		}

		parsedIPConfig.Address = *ipNet

		if ipAddr.To4() != nil {
			parsedIPConfig.Version = "4"
		} else {
			parsedIPConfig.Version = "6"
		}

		result.IPs = append(result.IPs, &parsedIPConfig)
	}

	return result, nil
}

    

 第 5 章节分析不存在 endpoint 的情况,需要创建,请看下文分析

5. 不存在 endpoint 的情况

// There's no existing endpoint, so we need to do the following:
// 1) Call the configured IPAM plugin to get IP address(es)
// 2) Configure the Calico endpoint
// 3) Create the veth, configuring it on both the host and container namespace.

    5.1 调用 calico ipam 或者 host-local,获取 IP,本文分析调用 calico ipam的情况

// 1) Run the IPAM plugin and make sure there's an IP address returned.
logger.WithFields(logrus.Fields{"paths": os.Getenv("CNI_PATH"),
	"type": conf.IPAM.Type}).Debug("Looking for IPAM plugin in paths")
ipamResult, err := ipam.ExecAdd(conf.IPAM.Type, args.StdinData)
logger.WithField("IPAM result", ipamResult).Info("Got result from IPAM plugin")
if err != nil {
	return err
}

    5.2 把接口类型转换为 Result 结构体类型

// Convert IPAM result into current Result.
// IPAM result has a bunch of fields that are optional for an IPAM plugin
// but required for a CNI plugin, so this is to populate those fields.
// See CNI Spec doc for more details.
result, err = current.NewResultFromResult(ipamResult)
if err != nil {
	utils.ReleaseIPAllocation(logger, conf, args)
	return err
}

if len(result.IPs) == 0 {
	utils.ReleaseIPAllocation(logger, conf, args)
	return errors.New("IPAM plugin returned missing IP config")
}

    5.3 创建 workloadendpoint 对象并赋值

apiVersion: projectcalico.org/v3
kind: WorkloadEndpoint
metadata:
  creationTimestamp: 2019-08-05T07:56:42Z
  generateName: mysql-hostpath-9ff8d9676-
  labels:
    app: wordpress-hostpath
    pod-template-hash: 9ff8d9676
    projectcalico.org/namespace: default
    projectcalico.org/orchestrator: k8s
    projectcalico.org/serviceaccount: default
    tier: mysql-hostpath
  name: master--node-k8s-mysql--hostpath--9ff8d9676--8njtp-eth0
  namespace: default
  resourceVersion: "2972911"
  uid: 903386b5-b756-11e9-a1f8-080027603363
spec:
  endpoint: eth0
  interfaceName: cali40df26f67d0
  ipNetworks:
  - 192.170.77.171/32
  node: master-node
  orchestrator: k8s
  pod: mysql-hostpath-9ff8d9676-8njtp
  ports:
  - name: mysql
    port: 3306
    protocol: TCP
  profiles:
  - kns.default
  - ksa.default.default

// 2) Create the endpoint object
endpoint = api.NewWorkloadEndpoint()
endpoint.Name = wepIDs.WEPName
endpoint.Namespace = wepIDs.Namespace
endpoint.Spec.Endpoint = wepIDs.Endpoint
endpoint.Spec.Node = wepIDs.Node
endpoint.Spec.Orchestrator = wepIDs.Orchestrator
endpoint.Spec.ContainerID = wepIDs.ContainerID
endpoint.Labels = labels
endpoint.Spec.Profiles = []string{profileID}

    5.4 DoNetworking 函数创建 veth pair hostVethName 和 contVethMac

       创建 endpoint 的操作则是,调用 ip link add $contVethName type veth peer name $hostVethName 创建 veth pair

       创建 169.254.1.1 的默认网络路由,将 host veth 端移到 host 的 namespace

       第 6 章节详细讲解

// 3) Set up the veth
hostVethName, contVethMac, err := utils.DoNetworking(
	args, conf, result, logger, "", utils.DefaultRoutes)
if err != nil {
	// Cleanup IP allocation and return the error.
	utils.ReleaseIPAllocation(logger, conf, args)
	return err
}

    5.5 CreateOrUpdate 函数

     如果 workloadEndpoint 的 ResourceVersion 则先前存在则调用 Update 操作,否则调用 Create 操作,将 workloadEndpoint 资源信息存入后端存储中

// Write the endpoint object (either the newly created one, or the updated one with a new ProfileIDs).
if _, err := utils.CreateOrUpdate(ctx, calicoClient, endpoint); err != nil {
	if !endpointAlreadyExisted {
		// Only clean up the IP allocation if this was a new endpoint.  Otherwise,
		// we'd release the IP that is already attached to the existing endpoint.
		utils.ReleaseIPAllocation(logger, conf, args)
	}
	return err
}

 

 

6. DoNetworking 函数

  • 调用netlink.LinkAdd(veth)  netlink.LinkSetUp(hostVeth) 创建一个网卡对veth,主机端 cali 开头,后面 11 位是容器的 id 开头。然后就是把网卡插入容器内设置 IP 和路由。一个在Linux的物机机上,一个在容器中,用于容器与物理机之间的通信

  • ip.AddRoute(r, gw, contVeth)添加路由

  • netlink.LinkSetNsFd(hostVeth, int(hostNS.Fd()))将host端veth加入加入到namespace,相当于命令ip link set $link netns $ns

// DoNetworking performs the networking for the given config and IPAM result
func DoNetworking(
	args *skel.CmdArgs,
	conf types.NetConf,
	result *current.Result,
	logger *logrus.Entry,
	desiredVethName string,
	routes []*net.IPNet,
) (hostVethName, contVethMAC string, err error) {
	// Select the first 11 characters of the containerID for the host veth.
	hostVethName = "cali" + args.ContainerID[:Min(11, len(args.ContainerID))]
	contVethName := args.IfName
	var hasIPv4, hasIPv6 bool

	// If a desired veth name was passed in, use that instead.
	if desiredVethName != "" {
		hostVethName = desiredVethName
	}

    6.1 如果 host 端 veth 存在在清理删除

// Clean up if hostVeth exists.
if oldHostVeth, err := netlink.LinkByName(hostVethName); err == nil {
	if err = netlink.LinkDel(oldHostVeth); err != nil {
		return "", "", fmt.Errorf("failed to delete old hostVeth %v: %v", hostVethName, err)
	}
	logger.Infof("Cleaning old hostVeth: %v", hostVethName)
}

    6.2 在 namespace 中,创建 veth 

     相当于执行命令 ip link add $contVethName type veth peer name $hostVethName

err = ns.WithNetNSPath(args.Netns, func(hostNS ns.NetNS) error {
	veth := &netlink.Veth{
		LinkAttrs: netlink.LinkAttrs{
			Name:  contVethName,
			Flags: net.FlagUp,
			MTU:   conf.MTU,
		},
		PeerName: hostVethName,
	}

	if err := netlink.LinkAdd(veth); err != nil {
		logger.Errorf("Error adding veth %+v: %s", veth, err)
		return err
	}

    6.3 设置 host 端 veth mac 地址

11: cali40df26f67d0@if3: mtu 1440 qdisc noqueue state UP 
    link/ether ee:ee:ee:ee:ee:ee brd ff:ff:ff:ff:ff:ff link-netnsid 2
    inet6 fe80::ecee:eeff:feee:eeee/64 scope link 
       valid_lft forever preferred_lft forever

if mac, err := net.ParseMAC("EE:EE:EE:EE:EE:EE"); err != nil {
	logger.Infof("failed to parse MAC Address: %v. Using kernel generated MAC.", err)
} else {
	// Set the MAC address on the host side interface so the kernel does not
	// have to generate a persistent address which fails some times.
	if err = netlink.LinkSetHardwareAddr(hostVeth, mac); err != nil {
		logger.Warnf("failed to Set MAC of %q: %v. Using kernel generated MAC.", hostVethName, err)
	}
}

    6.4 设置设备 up 启动状态

      相当于 ip link set $hostVeth up

// Explicitly set the veth to UP state, because netlink doesn't always do that on all the platforms with net.FlagUp.
// veth won't get a link local address unless it's set to UP state.
if err = netlink.LinkSetUp(hostVeth); err != nil {
	return fmt.Errorf("failed to set %q up: %v", hostVethName, err)
}

    6.5 对于 ipv4 版本设置路由

Kernel IP routing table
Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
0.0.0.0         169.254.1.1     0.0.0.0         UG    0      0        0 eth0
169.254.1.1     0.0.0.0         255.255.255.255 UH    0      0        0 eth0

    添加默认网关 169.254.1.1, 容器会查询下一跳 168.254.1.1 的 MAC 地址,通过 arp proxy 和修改容器路由表来实现

    调用 ip.AddRoute 添加路由,相当于 ip route add 命令

// Do the per-IP version set-up.  Add gateway routes etc.
if hasIPv4 {
	// Add a connected route to a dummy next hop so that a default route can be set
	gw := net.IPv4(169, 254, 1, 1)
	gwNet := &net.IPNet{IP: gw, Mask: net.CIDRMask(32, 32)}
	err := netlink.RouteAdd(
		&netlink.Route{
			LinkIndex: contVeth.Attrs().Index,
			Scope:     netlink.SCOPE_LINK,
			Dst:       gwNet,
		},
	)

	if err != nil {
		return fmt.Errorf("failed to add route inside the container: %v", err)
	}

	for _, r := range routes {
		if r.IP.To4() == nil {
			logger.WithField("route", r).Debug("Skipping non-IPv4 route")
			continue
		}
		logger.WithField("route", r).Debug("Adding IPv4 route")
		if err = ip.AddRoute(r, gw, contVeth); err != nil {
			return fmt.Errorf("failed to add IPv4 route for %v via %v: %v", r, gw, err)
		}
	}
}

    6.6 为容器端 veth 配置 ip 地址

      相当于命令 ip addr add $addr dev $link

// Now add the IPs to the container side of the veth.
for _, addr := range result.IPs {
	if err = netlink.AddrAdd(contVeth, &netlink.Addr{IPNet: &addr.Address}); err != nil {
		return fmt.Errorf("failed to add IP addr to %q: %v", contVeth, err)
	}
}

    6.7 把 host veth 移到 host 的 namespace 

// Now that the everything has been successfully set up in the container, move the "host" end of the
// veth into the host namespace.
if err = netlink.LinkSetNsFd(hostVeth, int(hostNS.Fd())); err != nil {
	return fmt.Errorf("failed to move veth to host netns: %v", err)
}

 

总结:

       从标准输入输出获取配置参数

       创建 endpoint 的操作则是,调用 ip link add $contVethName type veth peer name $hostVethName 创建 veth pair

       创建 169.254.1.1 的默认网络路由,将 host veth 端移到 host 的 namespace

 

注解解释

       cni.projectcalico.org/ipAddrs:指定一个要分配给Pod的IPv4和/或IPv6地址列表。 请求的IP地址将从Calico IPAM分配,并且必须存在于已配置的IP pool中

       cni.projectcalico.org/ipAddrsNoIpam: 指定一个要分配给Pod的IPv4和/或IPv6地址列表,绕过IPAM。 任何IP冲突和路由配置都必须由人工或其他系统处理。 Calico仅处理那些属于Calico IP pool 中的IP地址,将其路由分发到Pod。 如果分配的IP地址不在Calico IP pool中,则必须确保通过其他机制正确地处理该IP地址的路由。

      cni.projectcalico.org/ipv4pools:已配置的IPv4 pool列表,可从中选择Pod的地址。Calico IPAM 支持为每个命名空间或者是每个pod,指定专用的IP pool资源

参考:

    https://docs.projectcalico.org/v2.5/reference/cni-plugin/configuration

你可能感兴趣的:(kubernetes,CNI,网络)