【kubernetes/k8s概念】CNI plugin bridge源码分析




        Bridge(桥)是 Linux 上用来做 TCP/IP 二层协议交换的设备,与物理交换机功能相似。网桥是一种在链路层实现中继,对帧进行转发的技术,根据MAC分区块,可隔离碰撞,将网络的多个网段在数据链路层连接起来的网络设备。 Bridge 设备可以和 Linux 上其他网络设备实例连接,既 attach 一个从设备,类似于在现实世界中的交换机和一个用户终端之间连接一根网线。当有数据到达时,Bridge 会根据报文中的 MAC 信息进行广播、转发、丢弃处理



  • 收到新数据包时,记录源MAC地址和端口
  • 根据目的MAC查找本地缓存,如果能找到对应的MAC地址记录
  • 若发现记录不在本地网络,直接丢弃数据包
  • 若发现记录存在对应的端口,则将数据包直接从该端口转发出去
  • 如果本地缓存中不存在任何记录,则在本网段中进行广播。




  • name (string, required): the name of the network. 
  •  type (string, required): "bridge". 
  •  bridge (string, optional): name of the bridge to use/create. Defaults to "cni0". 
  •  isGateway (boolean, optional): assign an IP address to the bridge. Defaults to false. 
  •  isDefaultGateway (boolean, optional): Sets isGateway to true and makes the assigned IP the default route. Defaults to false. 
  •  forceAddress (boolean, optional): Indicates if a new IP address should be set if the previous value has been changed. Defaults to false. 
  •  ipMasq (boolean, optional): set up IP Masquerade on the host for traffic originating from this network and destined outside of it. Defaults to false. 
  •  mtu (integer, optional): explicitly set MTU to the specified value. Defaults to the value chosen by the kernel. 
  •  hairpinMode (boolean, optional): set hairpin mode for interfaces on the bridge. Defaults to false. 
  •  ipam (dictionary, required): IPAM configuration to be used for this network. 
  •  promiscMode (boolean, optional): set promiscuous mode on the bridge. Defaults to false. 
type NetConf struct {
	BrName       string `json:"bridge"`
	IsGW         bool   `json:"isGateway"`
	IsDefaultGW  bool   `json:"isDefaultGateway"`
	ForceAddress bool   `json:"forceAddress"`
	IPMasq       bool   `json:"ipMasq"`
	MTU          int    `json:"mtu"`
	HairpinMode  bool   `json:"hairpinMode"`
	PromiscMode  bool   `json:"promiscMode"`


    "name": "mynet",
    "type": "bridge",
    "bridge": "mynet0",
    "isDefaultGateway": true,
    "forceAddress": false,
    "ipMasq": true,
    "hairpinMode": true,
    "ipam": {
        "type": "host-local",
        "subnet": ""


        main 函数主要是实现了两个主要方法cmdAdd,cmdDel,主要分析cmdAdd实现

func main() {
	// TODO: implement plugin version
	skel.PluginMain(cmdAdd, cmdGet, cmdDel, version.All, "TODO")





func loadNetConf(bytes []byte) (*NetConf, string, error) {
	n := &NetConf{
		BrName: defaultBrName,
	if err := json.Unmarshal(bytes, n); err != nil {
		return nil, "", fmt.Errorf("failed to load netconf: %v", err)
	return n, n.CNIVersion, nil


  •  setupBridge里面调用ensureBridge,前面吧啦吧啦设置了一大队系统调用参数,
  •  通过netlink.LinkAdd(br)创建网桥,相当于ip link add br-test type bridge
  •  然后通过 netlink.LinkSetUp(br)启动网桥,相当于ip link set dev br-test up
func setupBridge(n *NetConf) (*netlink.Bridge, *current.Interface, error) {
	// create bridge if necessary
	br, err := ensureBridge(n.BrName, n.MTU, n.PromiscMode)
	if err != nil {
		return nil, nil, fmt.Errorf("failed to create bridge %q: %v", n.BrName, err)

	return br, ¤t.Interface{
		Name: br.Attrs().Name,
		Mac:  br.Attrs().HardwareAddr.String(),
	}, nil


  •  调用netlink.LinkAdd(veth)创建veth,这个是一个管道,Linux的网卡对,在容器对应的namespace下创建好虚拟网络接口,相当于ip link add test-veth0 type veth peer name test-veth1
  •  调用netlink.LinkSetUp(contVeth)启动容器端网卡,相当于ip link set dev test-veth0 up
  • 调用netlink.LinkSetNsFd(hostVeth, int(hostNS.Fd()))将host端加入namespace中,相当于ip link set $link netns $ns
  •  调用netlink.LinkSetMaster(hostVeth, br)绑到bridge,相当于ip link set dev test-veth0 master br-test
func setupVeth(netns ns.NetNS, br *netlink.Bridge, ifName string, mtu int, hairpinMode bool) (*current.Interface, *current.Interface, error) {
	contIface := ¤t.Interface{}
	hostIface := ¤t.Interface{}

	err := netns.Do(func(hostNS ns.NetNS) error {
		// create the veth pair in the container and move host end into host netns
		hostVeth, containerVeth, err := ip.SetupVeth(ifName, mtu, hostNS)
		if err != nil {
			return err
		contIface.Name = containerVeth.Name
		contIface.Mac = containerVeth.HardwareAddr.String()
		contIface.Sandbox = netns.Path()
		hostIface.Name = hostVeth.Name
		return nil
	if err != nil {
		return nil, nil, err

	// need to lookup hostVeth again as its index has changed during ns move
	hostVeth, err := netlink.LinkByName(hostIface.Name)
	if err != nil {
		return nil, nil, fmt.Errorf("failed to lookup %q: %v", hostIface.Name, err)
	hostIface.Mac = hostVeth.Attrs().HardwareAddr.String()

	// connect host veth end to the bridge
	if err := netlink.LinkSetMaster(hostVeth, br); err != nil {
		return nil, nil, fmt.Errorf("failed to connect %q to bridge %v: %v", hostVeth.Attrs().Name, br.Attrs().Name, err)

	// set hairpin mode
	if err = netlink.LinkSetHairpin(hostVeth, hairpinMode); err != nil {
		return nil, nil, fmt.Errorf("failed to setup hairpin mode for %v: %v", hostVeth.Attrs().Name, err)

	return hostIface, contIface, nil


  •  调用ipam.ExecAdd获取IP地址
func ExecAdd(plugin string, netconf []byte) (types.Result, error) {
	return invoke.DelegateAdd(plugin, netconf, nil)


  •   调用calcGateways根据IP地址计算对应的路由和网关
// Gather gateway information for each IP family
	gwsV4, gwsV6, err := calcGateways(result, n)
	if err != nil {
		return err


  •   调用ipam.ConfigureIface将IP地址设置到对应的虚拟网络接口上,相当于ifconfig test-veth0 up
  •   调用enableIPForward(gws.family)开启ip转发,路径/proc/sys/net/ipv4/ip_forward写入值1
// Configure the container hardware address and IP address(es)
	if err := netns.Do(func(_ ns.NetNS) error {
		contVeth, err := net.InterfaceByName(args.IfName)
		if err != nil {
			return err

		// Disable IPv6 DAD just in case hairpin mode is enabled on the
		// bridge. Hairpin mode causes echos of neighbor solicitation
		// packets, which causes DAD failures.
		for _, ipc := range result.IPs {
			if ipc.Version == "6" && (n.HairpinMode || n.PromiscMode) {
				if err := disableIPV6DAD(args.IfName); err != nil {
					return err

		// Add the IP to the interface
		if err := ipam.ConfigureIface(args.IfName, result); err != nil {
			return err

		// Send a gratuitous arp
		for _, ipc := range result.IPs {
			if ipc.Version == "4" {
				_ = arping.GratuitousArpOverIface(ipc.Address.IP, *contVeth)
		return nil
	}); err != nil {
		return err


  •   调用ip.SetupIPMasq建立iptables规则
if n.IPMasq {
		chain := utils.FormatChainName(n.Name, args.ContainerID)
		comment := utils.FormatComment(n.Name, args.ContainerID)
		for _, ipc := range result.IPs {
			if err = ip.SetupIPMasq(ip.Network(&ipc.Address), chain, comment); err != nil {
				return err
  • -A POSTROUTING -s -m comment --comment "name: \"cbr0\" id: \"7567cf04805c47d94a3d9ad1db32f7cb424fd058c497b5f450278c1ee9c07e5b\"" -j CNI-15aa38615ee8a1e7bfb519c7
  • -A CNI-15aa38615ee8a1e7bfb519c7 -d -m comment --comment "name: \"cbr0\" id: \"7567cf04805c47d94a3d9ad1db32f7cb424fd058c497b5f450278c1ee9c07e5b\"" -j ACCEPT
  • -A CNI-15aa38615ee8a1e7bfb519c7 ! -d -m comment --comment "name: \"cbr0\" id: \"7567cf04805c47d94a3d9ad1db32f7cb424fd058c497b5f450278c1ee9c07e5b\"" -j MASQUERADE

  4.1 FormatChainName函數

  格式維 CNI-15aa38615ee8a1e7bfb519c7,容器id

// Generates a chain name to be used with iptables.
// Ensures that the generated chain name is exactly
// maxChainLength chars in length
func FormatChainName(name string, id string) string {
   chainBytes := sha512.Sum512([]byte(name + id))
   chain := fmt.Sprintf("%s%x", chainPrefix, chainBytes)
   return chain[:maxChainLength]

  4.2 SetupIPMasq函数



// SetupIPMasq installs iptables rules to masquerade traffic
// coming from ipn and going outside of it
func SetupIPMasq(ipn *net.IPNet, chain string, comment string) error {
	isV6 := ipn.IP.To4() == nil

	var ipt *iptables.IPTables
	var err error
	var multicastNet string

	if isV6 {
		ipt, err = iptables.NewWithProtocol(iptables.ProtocolIPv6)
		multicastNet = "ff00::/8"
	} else {
		ipt, err = iptables.NewWithProtocol(iptables.ProtocolIPv4)
		multicastNet = ""
	if err != nil {
		return fmt.Errorf("failed to locate iptables: %v", err)

	// Create chain if doesn't exist
	exists := false
	chains, err := ipt.ListChains("nat")
	if err != nil {
		return fmt.Errorf("failed to list chains: %v", err)
	for _, ch := range chains {
		if ch == chain {
			exists = true
	if !exists {
		if err = ipt.NewChain("nat", chain); err != nil {
			return err

	// Packets to this network should not be touched
	if err := ipt.AppendUnique("nat", chain, "-d", ipn.String(), "-j", "ACCEPT", "-m", "comment", "--comment", comment); err != nil {
		return err

	// Don't masquerade multicast - pods should be able to talk to other pods
	// on the local network via multicast.
	if err := ipt.AppendUnique("nat", chain, "!", "-d", multicastNet, "-j", "MASQUERADE", "-m", "comment", "--comment", comment); err != nil {
		return err

	return ipt.AppendUnique("nat", "POSTROUTING", "-s", ipn.String(), "-j", chain, "-m", "comment", "--comment", comment)

