1. 前言
上文 [docker 网络][flannel] 配置安装测试 利用
flannel vxlan
实现了容器跨主机相互访问. 本文将模拟flannel vxlan
看看flannel
到底做了些什么操作可以让容器跨主机相互访问. 关于vxlan
的原理可以参考 VXLAN详解, 本文将会注重实际操作过程.
环境如下:
Machine 1 : 172.21.0.16 主机名:master
Machine 2 : 172.21.0.12 主机名:worker
flannel
1. [docker 网络][flannel] 配置安装测试
2. [docker 网络][flannel] 背后操作
3. [docker 网络][flannel] 源码简单分析
2. 增加vxlan节点
2.1 master(172.21.0.16)
add-vxlan.sh
脚本增加一个vxlan
类型的vxlan.1
, 地址为10.0.1.250/32
.
[root@master vxlan]#
[root@master vxlan]# cat add-vxlan.sh
ip link delete vxlan.1
ip link add vxlan.1 type vxlan id 1 dev eth0 local 172.21.0.16 dstport 4789 nolearning
ip addr add 10.0.1.250/32 dev vxlan.1
ip link set vxlan.1 up
[root@master vxlan]# ./add-vxlan.sh
[root@master vxlan]# ifconfig vxlan.1
vxlan.1: flags=4163 mtu 1450
inet 10.0.1.250 netmask 255.255.255.255 broadcast 0.0.0.0
inet6 fe80::c0d4:cfff:feb5:8612 prefixlen 64 scopeid 0x20
ether c2:d4:cf:b5:86:12 txqueuelen 1000 (Ethernet)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 6 overruns 0 carrier 0 collisions 0
[root@master vxlan]#
2.2 worker(172.21.0.12)
[root@worker vxlan]# cat add-vxlan.sh
ip link delete vxlan.1
ip link add vxlan.1 type vxlan id 1 dev eth0 local 172.21.0.12 dstport 4789 nolearning
ip addr add 10.0.2.250/32 dev vxlan.1
ip link set vxlan.1 up
[root@worker vxlan]# ./add-vxlan.sh
[root@worker vxlan]# ifconfig vxlan.1
vxlan.1: flags=4163 mtu 1450
inet 10.0.2.250 netmask 255.255.255.255 broadcast 0.0.0.0
inet6 fe80::bcba:39ff:fe2e:a8ed prefixlen 64 scopeid 0x20
ether be:ba:39:2e:a8:ed txqueuelen 1000 (Ethernet)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 6 overruns 0 carrier 0 collisions 0
[root@worker vxlan]#
3. 增加fdb, neighbors 和 route
3.1 master(172.21.0.16)
需要知道
worker(172.21.0.12)
中vxlan.1
的mac
地址be:ba:39:2e:a8:ed
以及vxlan.1
的ip
地址(10.0.2.250/32
).
[root@master vxlan]# cat add-fdb-arp-route.sh
#ip route add 10.0.2.0/24 dev vxlan.1 onlink
ip route add 10.0.2.0/24 via 10.0.2.250 dev vxlan.1 onlink
bridge fdb add $1 dev vxlan.1 dst 172.21.0.12
ip neighbor add 10.0.2.250 lladdr $1 dev vxlan.1
[root@master vxlan]# ./add-fdb-arp-route.sh be:ba:39:2e:a8:ed
[root@master vxlan]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 172.21.0.1 0.0.0.0 UG 0 0 0 eth0
10.0.2.0 10.0.2.250 255.255.255.0 UG 0 0 0 vxlan.1
169.254.0.0 0.0.0.0 255.255.0.0 U 1002 0 0 eth0
172.21.0.0 0.0.0.0 255.255.240.0 U 0 0 0 eth0
[root@master vxlan]# bridge fdb show
...
be:ba:39:2e:a8:ed dev vxlan.1 dst 172.21.0.12 self permanent
...
[root@master vxlan]# ip neighbor show
...
10.0.2.250 dev vxlan.1 lladdr be:ba:39:2e:a8:ed PERMANENT
...
[root@master vxlan]#
3.2 worker(172.21.0.12)
需要知道
master(172.21.0.16)
中vxlan.1
的mac
地址c2:d4:cf:b5:86:12
以及vxlan.1
的ip
地址(10.0.2.250
).
[root@worker vxlan]# cat add-fdb-arp-route.sh
ip route add 10.0.1.0/24 via 10.0.1.250 dev vxlan.1 onlink
bridge fdb add $1 dev vxlan.1 dst 172.21.0.16
ip neighbor add 10.0.1.250 lladdr $1 dev vxlan.1
[root@worker vxlan]# ./add-fdb-arp-route.sh c2:d4:cf:b5:86:12
[root@worker vxlan]# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 172.21.0.1 0.0.0.0 UG 0 0 0 eth0
10.0.1.0 10.0.1.250 255.255.255.0 UG 0 0 0 vxlan.1
169.254.0.0 0.0.0.0 255.255.0.0 U 1002 0 0 eth0
172.21.0.0 0.0.0.0 255.255.240.0 U 0 0 0 eth0
[root@worker vxlan]# bridge fdb show
...
c2:d4:cf:b5:86:12 dev vxlan.1 dst 172.21.0.16 self permanent
...
[root@worker vxlan]# ip neighbor show
...
10.0.1.250 dev vxlan.1 lladdr c2:d4:cf:b5:86:12 PERMANENT
...
[root@worker vxlan]#
3.3 测试vxlan之间相互访问
===> master vxlan.1 -> worker vxlan.1
[root@master vxlan]# ping -c 1 10.0.2.250
PING 10.0.2.250 (10.0.2.250) 56(84) bytes of data.
64 bytes from 10.0.2.250: icmp_seq=1 ttl=64 time=0.454 ms
--- 10.0.2.250 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.454/0.454/0.454/0.000 ms
[root@master vxlan]#
===> worker vxlan.1 -> master vxlan.1
[root@worker vxlan]# ping -c 1 10.0.1.250
PING 10.0.1.250 (10.0.1.250) 56(84) bytes of data.
64 bytes from 10.0.1.250: icmp_seq=1 ttl=64 time=0.437 ms
--- 10.0.1.250 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.437/0.437/0.437/0.000 ms
[root@worker vxlan]#
可以看到两个
vxlan
设备已经可以相互访问.
4. 增加network namespace (模拟docker)
4.1 master(172.21.0.16)
[root@master vxlan]# ip netns ls
[root@master vxlan]# cat add-ns.sh
ip link delete veth1 type veth
ip netns delete ns1
ip link delete docker0 type bridge
iptables -t nat -F
iptables -F
ip link add veth1 type veth peer name veth2
ip link set veth1 up
ip link add docker0 type bridge
ifconfig docker0 10.0.1.1/24
#brctl addif docker0 veth1
ip link set veth1 master docker0
ip netns add ns1
ip link set veth2 netns ns1
ip netns exec ns1 ip addr add 10.0.1.2/24 dev veth2
ip netns exec ns1 ip link set lo up
ip netns exec ns1 ip link set veth2 up
ip netns exec ns1 route add default gw 10.0.1.1
iptables -P FORWARD ACCEPT
iptables -t nat -A POSTROUTING -s 10.0.1.0/24 -o eth0 -j MASQUERADE
iptables -t filter -A FORWARD -s 10.0.0.0/16 -j ACCEPT
iptables -t filter -A FORWARD -d 10.0.0.0/16 -j ACCEPT
[root@master vxlan]# ./add-ns.sh
Cannot find device "docker0"
[root@master vxlan]# ./add-ns.sh
[root@master vxlan]# ip netns ls
ns1 (id: 0)
[root@master vxlan]# ip netns exec ns1 sh
sh-4.2# ifconfig
lo: flags=73 mtu 65536
inet 127.0.0.1 netmask 255.0.0.0
inet6 ::1 prefixlen 128 scopeid 0x10
loop txqueuelen 1000 (Local Loopback)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
veth2: flags=4163 mtu 1500
inet 10.0.1.2 netmask 255.255.255.0 broadcast 0.0.0.0
inet6 fe80::48e8:88ff:fe95:945c prefixlen 64 scopeid 0x20
ether 4a:e8:88:95:94:5c txqueuelen 1000 (Ethernet)
RX packets 14 bytes 1116 (1.0 KiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 8 bytes 648 (648.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
sh-4.2# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 10.0.1.1 0.0.0.0 UG 0 0 0 veth2
10.0.1.0 0.0.0.0 255.255.255.0 U 0 0 0 veth2
sh-4.2#
可以看到
network namespace
已经创建成功了, 并且相关配置已经设置完成.
4.2 worker(172.21.0.12)
[root@worker vxlan]# ip netns ls
[root@worker vxlan]# cat add-ns.sh
ip link delete veth1 type veth
ip netns delete ns1
ip link delete docker0 type bridge
iptables -t nat -F
iptables -F
ip link add veth1 type veth peer name veth2
ip link set veth1 up
ip link add docker0 type bridge
ifconfig docker0 10.0.2.1/24
#brctl addif docker0 veth1
ip link set veth1 master docker0
ip netns add ns1
ip link set veth2 netns ns1
ip netns exec ns1 ip addr add 10.0.2.2/24 dev veth2
ip netns exec ns1 ip link set lo up
ip netns exec ns1 ip link set veth2 up
ip netns exec ns1 route add default gw 10.0.2.1
iptables -P FORWARD ACCEPT
iptables -t nat -A POSTROUTING -s 10.0.2.0/24 -o eth0 -j MASQUERADE
iptables -t filter -A FORWARD -s 10.0.0.0/16 -j ACCEPT
iptables -t filter -A FORWARD -d 10.0.0.0/16 -j ACCEPT
[root@worker vxlan]# ./add-ns.sh
Cannot find device "veth1"
Cannot remove namespace file "/var/run/netns/ns1": No such file or directory
Cannot find device "docker0"
[root@worker vxlan]# ./add-ns.sh
[root@worker vxlan]# ip netns ls
ns1 (id: 1)
[root@worker vxlan]# ip netns exec ns1 sh
sh-4.2# ifconfig
lo: flags=73 mtu 65536
inet 127.0.0.1 netmask 255.0.0.0
inet6 ::1 prefixlen 128 scopeid 0x10
loop txqueuelen 1000 (Local Loopback)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
veth2: flags=4163 mtu 1500
inet 10.0.2.2 netmask 255.255.255.0 broadcast 0.0.0.0
inet6 fe80::6c11:71ff:feb8:3a6c prefixlen 64 scopeid 0x20
ether 6e:11:71:b8:3a:6c txqueuelen 1000 (Ethernet)
RX packets 15 bytes 1206 (1.1 KiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 8 bytes 648 (648.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
sh-4.2# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 10.0.2.1 0.0.0.0 UG 0 0 0 veth2
10.0.2.0 0.0.0.0 255.255.255.0 U 0 0 0 veth2
sh-4.2#
worker
中模拟的容器也创建成功了.
4.3 测试跨主机访问
master
中的容器访问worker
中的容器,
[root@master vxlan]# ip netns exec ns1 sh
===> 访问worker 的ns1
sh-4.2# ping -c 1 10.0.2.2
PING 10.0.2.2 (10.0.2.2) 56(84) bytes of data.
64 bytes from 10.0.2.2: icmp_seq=1 ttl=62 time=0.411 ms
--- 10.0.2.2 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.411/0.411/0.411/0.000 ms
===> 访问worker 的docker0
sh-4.2# ping -c 1 10.0.2.1
PING 10.0.2.1 (10.0.2.1) 56(84) bytes of data.
64 bytes from 10.0.2.1: icmp_seq=1 ttl=63 time=0.389 ms
--- 10.0.2.1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.389/0.389/0.389/0.000 ms
===> 访问worker 的vxlan.1
sh-4.2# ping -c 1 10.0.2.250
PING 10.0.2.250 (10.0.2.250) 56(84) bytes of data.
64 bytes from 10.0.2.250: icmp_seq=1 ttl=63 time=0.394 ms
--- 10.0.2.250 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.394/0.394/0.394/0.000 ms
===> 访问worker
sh-4.2# ping -c 1 172.21.0.12
PING 172.21.0.12 (172.21.0.12) 56(84) bytes of data.
64 bytes from 172.21.0.12: icmp_seq=1 ttl=63 time=0.351 ms
--- 172.21.0.12 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.351/0.351/0.351/0.000 ms
worker
容器访问master
中的容器,docker0
,vxlan.1
以及主机.
[root@worker vxlan]# ip netns exec ns1 sh
===> 访问master的ns1
sh-4.2# ping -c 1 10.0.1.2
PING 10.0.1.2 (10.0.1.2) 56(84) bytes of data.
64 bytes from 10.0.1.2: icmp_seq=1 ttl=62 time=0.449 ms
--- 10.0.1.2 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.449/0.449/0.449/0.000 ms
===> 访问master的docker0
sh-4.2# ping -c 1 10.0.1.1
PING 10.0.1.1 (10.0.1.1) 56(84) bytes of data.
64 bytes from 10.0.1.1: icmp_seq=1 ttl=63 time=0.408 ms
--- 10.0.1.1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.408/0.408/0.408/0.000 ms
===> 访问master的vxlan.1
sh-4.2# ping -c 1 10.0.1.250
PING 10.0.1.250 (10.0.1.250) 56(84) bytes of data.
64 bytes from 10.0.1.250: icmp_seq=1 ttl=63 time=0.409 ms
--- 10.0.1.250 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.409/0.409/0.409/0.000 ms
===> 访问master
sh-4.2# ping -c 1 172.21.0.16
PING 172.21.0.16 (172.21.0.16) 56(84) bytes of data.
64 bytes from 172.21.0.16: icmp_seq=1 ttl=63 time=0.348 ms
--- 172.21.0.16 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.348/0.348/0.348/0.000 ms
可以看到两个容器
network namespace
实现了跨主机访问.
5. 在master中增加一个network namespace
[root@master vxlan]# cat add-another-ns.sh
ip link delete veth5 type veth
ip netns delete ns2
ip link add veth5 type veth peer name veth6
ip link set veth5 up
ip link set veth5 master docker0
ip netns add ns2
ip link set veth6 netns ns2
ip netns exec ns2 ip addr add 10.0.1.3/24 dev veth6
ip netns exec ns2 ip link set lo up
ip netns exec ns2 ip link set veth6 up
ip netns exec ns2 route add default gw 10.0.1.1
[root@master vxlan]# ./add-another-ns.sh
Cannot find device "veth5"
Cannot remove namespace file "/var/run/netns/ns2": No such file or directory
[root@master vxlan]# ./add-another-ns.sh
[root@master vxlan]# ip netns ls
ns2 (id: 1)
ns1 (id: 0)
[root@master vxlan]#
[root@master vxlan]# ip netns exec ns2 sh
sh-4.2# ifconfig
lo: flags=73 mtu 65536
inet 127.0.0.1 netmask 255.0.0.0
inet6 ::1 prefixlen 128 scopeid 0x10
loop txqueuelen 1000 (Local Loopback)
RX packets 0 bytes 0 (0.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 0 bytes 0 (0.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
veth6: flags=4163 mtu 1500
inet 10.0.1.3 netmask 255.255.255.0 broadcast 0.0.0.0
inet6 fe80::5ca9:72ff:fe81:24d3 prefixlen 64 scopeid 0x20
ether 5e:a9:72:81:24:d3 txqueuelen 1000 (Ethernet)
RX packets 8 bytes 648 (648.0 B)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 8 bytes 648 (648.0 B)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
sh-4.2# route -n
Kernel IP routing table
Destination Gateway Genmask Flags Metric Ref Use Iface
0.0.0.0 10.0.1.1 0.0.0.0 UG 0 0 0 veth6
10.0.1.0 0.0.0.0 255.255.255.0 U 0 0 0 veth6
===> 访问本机ns1
sh-4.2# ping -c 1 10.0.1.2
PING 10.0.1.2 (10.0.1.2) 56(84) bytes of data.
64 bytes from 10.0.1.2: icmp_seq=1 ttl=64 time=0.071 ms
--- 10.0.1.2 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.071/0.071/0.071/0.000 ms
===> 访问本机docker0
sh-4.2# ping -c 1 10.0.1.1
PING 10.0.1.1 (10.0.1.1) 56(84) bytes of data.
64 bytes from 10.0.1.1: icmp_seq=1 ttl=64 time=0.067 ms
--- 10.0.1.1 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.067/0.067/0.067/0.000 ms
===> 访问本机vxlan.1
sh-4.2# ping -c 1 10.0.1.250
PING 10.0.1.250 (10.0.1.250) 56(84) bytes of data.
64 bytes from 10.0.1.250: icmp_seq=1 ttl=64 time=0.066 ms
--- 10.0.1.250 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.066/0.066/0.066/0.000 ms
===> 访问本机
sh-4.2# ping -c 1 172.21.0.16
PING 172.21.0.16 (172.21.0.16) 56(84) bytes of data.
64 bytes from 172.21.0.16: icmp_seq=1 ttl=64 time=0.044 ms
--- 172.21.0.16 ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 0.044/0.044/0.044/0.000 ms
===> 访问互联网
sh-4.2# ping -c 1 www.baidu.com
PING www.a.shifen.com (220.181.38.149) 56(84) bytes of data.
64 bytes from 220.181.38.149 (220.181.38.149): icmp_seq=1 ttl=249 time=6.13 ms
--- www.a.shifen.com ping statistics ---
1 packets transmitted, 1 received, 0% packet loss, time 0ms
rtt min/avg/max/mdev = 6.132/6.132/6.132/0.000 ms
sh-4.2# exit
exit
[root@master vxlan]#
可以看到主机内部的容器(
network nameapce
)已经可以相互之间访问.