场景描述:
通过附件[1]的脚本分别在两个节点上各创建同一子网下的port,分析两个port之间流量互通。
OVN trace流表匹配:
()[root@ovn-ovsdb-sb-0 /]# ovn-trace --no-friendly-names 'inport == "8bb043eb-4ff5-4469-acc3-f3f84ba354a6" && eth.src == fa:16:3e:fd:85:6d && eth.dst == ff:ff:ff:ff:ff:ff && arp.op == 1 && arp.sha == fa:16:3e:fd:85:6d && arp.spa == 192.168.222.215 && arp.tha == ff:ff:ff:ff:ff:ff && arp.tpa == 192.168.222.204'
# arp,reg14=0x2,vlan_tci=0x0000,dl_src=fa:16:3e:fd:85:6d,dl_dst=ff:ff:ff:ff:ff:ff,arp_spa=192.168.222.215,arp_tpa=192.168.222.204,arp_op=1,arp_sha=fa:16:3e:fd:85:6d,arp_tha=ff:ff:ff:ff:ff:ff
ingress(dp="c8d7770d-a81c-4b8c-bb19-630c5e003cde", inport="8bb043eb-4ff5-4469-acc3-f3f84ba354a6")
-------------------------------------------------------------------------------------------------
0. ls_in_port_sec_l2 (ovn-northd.c:4829): inport == "8bb043eb-4ff5-4469-acc3-f3f84ba354a6", priority 50, uuid dc9eefc2
next;
3. ls_in_lookup_fdb (ovn-northd.c:4865): inport == "8bb043eb-4ff5-4469-acc3-f3f84ba354a6", priority 100, uuid 07af3a0d
reg0[11] = lookup_fdb(inport, eth.src);
/* MAC lookup for ff:ff:ff:ff:ff:ff found in FDB. */
next;
22. ls_in_l2_lkup (ovn-northd.c:7352): eth.mcast, priority 70, uuid cfb89e88
outport = "_MC_flood";
output;
multicast(dp="c8d7770d-a81c-4b8c-bb19-630c5e003cde", mcgroup="_MC_flood")
-------------------------------------------------------------------------
egress(dp="c8d7770d-a81c-4b8c-bb19-630c5e003cde", inport="8bb043eb-4ff5-4469-acc3-f3f84ba354a6", outport="0510e1de-8812-4810-8220-f05c78ed5505")
------------------------------------------------------------------------------------------------------------------------------------------------
9. ls_out_port_sec_l2 (ovn-northd.c:4947): eth.mcast, priority 100, uuid fd94ca6d
output;
/* output to "0510e1de-8812-4810-8220-f05c78ed5505", type "localport" */
egress(dp="c8d7770d-a81c-4b8c-bb19-630c5e003cde", inport="8bb043eb-4ff5-4469-acc3-f3f84ba354a6", outport="8bb043eb-4ff5-4469-acc3-f3f84ba354a6")
------------------------------------------------------------------------------------------------------------------------------------------------
/* omitting output because inport == outport && !flags.loopback */
egress(dp="c8d7770d-a81c-4b8c-bb19-630c5e003cde", inport="8bb043eb-4ff5-4469-acc3-f3f84ba354a6", outport="e32c218d-64c2-4073-a157-db17a236da09")
------------------------------------------------------------------------------------------------------------------------------------------------
9. ls_out_port_sec_l2 (ovn-northd.c:4947): eth.mcast, priority 100, uuid fd94ca6d
output;
/* output to "e32c218d-64c2-4073-a157-db17a236da09", type "" */
通过上述trace结果可见其流量所经过的lflow的stage分别是port_sec, lookup_fdb, l2_lkup以及multicast
neutron和ovn数据库中port相关信息见附[2],从NB数据库可见lsp port_security字段为空,并且addresses中带有unknown
根据各个stage分别分析关闭port安全组的port的实现:
Neutron和OVN对关闭安全组的port的数据处理
neutron:
neutron的ovn plugin在update_port时会调用ovnclient的update_port方法,其中会根据_get_port_options中获取的port_info来设置lsp的addresses和port_security字段。
因为关闭了port安全组,所以在port_info中port_security为空,并且在addresses中会带有unknown。
def _get_port_options(self, port):
...
if not port_security and not port_type:
# Port security is disabled for this port.
# So this port can send traffic with any mac address.
# OVN allows any mac address from a port if "unknown"
# is added to the Logical_Switch_Port.addresses column.
# So add it.
addresses.append(ovn_const.UNKNOWN_ADDR)
ovn:
northd中build_ports:
根据NB的LSP数据更新SB的Port_Binding,并且给每个logical port初始化ovn_port的结构体
build_ports
|- join_logical_ports /* 初始化ovn_port结构体 */
|- /* 处理port的tunnel_key,tag以及SB Port_Binding表的维护,MAC_Binding表旧数据清除 */
join_logical_ports中会遍历lsp的addresses字段,如果其中有unknown,则给ovn_port的has_unknow字段置为true
static void
join_logical_ports(struct northd_context *ctx,
struct hmap *datapaths, struct hmap *ports,
struct hmap *chassis_qdisc_queues,
struct hmap *tag_alloc_table, struct ovs_list *sb_only,
struct ovs_list *nb_only, struct ovs_list *both)
{
...
for (size_t j = 0; j < nbsp->n_addresses; j++) {
if (!strcmp(nbsp->addresses[j], "unknown")) {
op->has_unknown = true;
continue;
}
Multicast_Group部分
build_mcast_groups中会遍历join_logical_ports中生成的ovn_port和join_datapaths中生成的ovn_datapath数据,并根据port类型,datapath和port的配置将port降入不同的mcgroup(_MC_flood,_MC_mrouter_static, _MC_flood_l2 )。
在build_lswitch_ip_unicast_lookup中如果port addresses中带有unknown则会将port加入_MC_unknown中
/* Ingress table 19: Destination lookup, unicast handling (priority 50), */
static void
build_lswitch_ip_unicast_lookup(struct ovn_port *op,
struct hmap *lflows,
struct hmap *mcgroups,
struct ds *actions,
struct ds *match)
{
...
} else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
if (lsp_is_enabled(op->nbsp)) {
ovs_mutex_lock(&mcgroup_mutex);
ovn_multicast_add(mcgroups, &mc_unknown, op);
ovs_mutex_unlock(&mcgroup_mutex);
op->od->has_unknown = true;
}
port security部分
build_lflows维护和更新SB的Ligical_Flow和Multicast_Group表。
build_lswitch_and_lrouter_flows
|- /* 遍历ovn_datapath */
|- build_lswitch_and_lrouter_iterate_by_od
|- /* 遍历ovn_port */
|- build_lswitch_and_lrouter_iterate_by_op
|- /* 遍历ovn_northd_lb */
|- build_lswitch_arp_nd_service_monitor
|- build_lrouter_flows_for_lb
|- build_lswitch_flows_for_lb
|- /* 遍历igmp_group */
|- build_lswitch_ip_mcast_igmp_mld
|- build_lswitch_flows
在build_lswitch_and_lrouter_iterate_by_op中执行build_lswitch_input_port_sec_op时会判断lsp的port_security字段,为空则跳过。
ARP代答部分
在build_lswitch_and_lrouter_iterate_by_op中执行build_lswitch_arp_nd_responder_known_ips时会发现lsp的has_unknown字段为true所以不会下发arp代答流表
static void
build_lswitch_arp_nd_responder_known_ips(struct ovn_port *op,
struct hmap *lflows,
struct hmap *ports,
struct ds *actions,
struct ds *match)
{
...
if (lsp_is_external(op->nbsp) || op->has_unknown) {
return;
}
由于没有arp待答,当访问关闭安全组的port时,如果FDB表中没有该port的mac信息,则需要发送arp广播包来获取关闭安全组port的mac地址。
fdb部分
build_flows中会遍历ovn_port执行build_lswitch_learn_fdb_op,其中对于关闭安全组的port会下发如下流表
当从该lport进来的流表如果lookup fdb不到则会执行put_fdb操作
static void
build_lswitch_learn_fdb_op(
struct ovn_port *op, struct hmap *lflows,
struct ds *actions, struct ds *match)
{
if (op->nbsp && !op->n_ps_addrs && !strcmp(op->nbsp->type, "") &&
op->has_unknown) {
ds_clear(match);
ds_clear(actions);
ds_put_format(match, "inport == %s", op->json_key);
ds_put_format(actions, REGBIT_LKUP_FDB
" = lookup_fdb(inport, eth.src); next;");
ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_LOOKUP_FDB, 100,
ds_cstr(match), ds_cstr(actions),
&op->nbsp->header_);
ds_put_cstr(match, " && "REGBIT_LKUP_FDB" == 0");
ds_clear(actions);
ds_put_cstr(actions, "put_fdb(inport, eth.src); next;");
ovn_lflow_add_with_hint(lflows, op->od, S_SWITCH_IN_PUT_FDB, 100,
ds_cstr(match), ds_cstr(actions),
&op->nbsp->header_);
}
}
_uuid : c390d94b-879b-4fb2-8ae4-3dd473b16e9a
actions : "put_fdb(inport, eth.src); next;"
external_ids : {source="ovn-northd.c:4884", stage-hint=aa779f10, stage-name=ls_in_put_fdb}
logical_datapath : a899c3df-117a-4a65-8e86-d3cb4663f726
logical_dp_group : []
match : "inport == \"a7b8fccd-eb2c-422c-83a7-3bb98e24ff4a\" && reg0[11] == 0"
pipeline : ingress
priority : 100
table_id : 4
hash : 0
_uuid : f079a50a-a52a-407c-aee5-447f747cc514
actions : "reg0[11] = lookup_fdb(inport, eth.src); next;"
external_ids : {source="ovn-northd.c:4877", stage-hint=aa779f10, stage-name=ls_in_lookup_fdb}
logical_datapath : a899c3df-117a-4a65-8e86-d3cb4663f726
logical_dp_group : []
match : "inport == \"a7b8fccd-eb2c-422c-83a7-3bb98e24ff4a\""
pipeline : ingress
priority : 100
table_id : 3
hash : 0
consider_logical_flow__中会执行ovnacts_parse_string来解析逻辑流表中的action字段
add_matches_to_flow_table中ovnacts_encode根据lflow的action执行encode_PUT_FDB和encode_LOOKUP_FDB
encode_PUT_FDB会根据port和mac信息设置需要上送的controller的OpenFlow流表数据包。后通过ofctrl模块将OpenFlow流表发送给ovs vswitchd
encode_LOOKUP_FDB会从OpenFlow数据包中解析数据
lflow_run中add_fdb_flows会遍历SB中的FDB表,并根据FDB表信息下发FDB相关的OpenFlow流表
static void
consider_fdb_flows(const struct sbrec_fdb *fdb,
const struct hmap *local_datapaths,
struct ovn_desired_flow_table *flow_table)
{
...
struct match match = MATCH_CATCHALL_INITIALIZER;
match_set_metadata(&match, htonll(fdb->dp_key));
match_set_dl_dst(&match, mac);
uint64_t stub[1024 / 8];
struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(stub);
put_load64(fdb->port_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
ofctrl_add_flow(flow_table, OFTABLE_GET_FDB, 100,
fdb->header_.uuid.parts[0], &match, &ofpacts,
&fdb->header_.uuid);
...
put_load(&value, sizeof value, MFF_LOG_FLAGS,
MLF_LOOKUP_FDB_BIT, 1, &ofpacts);
struct match lookup_match = MATCH_CATCHALL_INITIALIZER;
match_set_metadata(&lookup_match, htonll(fdb->dp_key));
match_set_dl_src(&lookup_match, mac);
match_set_reg(&lookup_match, MFF_LOG_INPORT - MFF_REG0, fdb->port_key);
ofctrl_add_flow(flow_table, OFTABLE_LOOKUP_FDB, 100,
fdb->header_.uuid.parts[0], &lookup_match, &ofpacts,
&fdb->header_.uuid);
FDB表的维护
pinctrl在执行process_packet_in处理controller过来的数据包时根据action code调用pinctrl_handle_put_fdb
ovn_fdb_add会将dp_key, port_key, mac等信息插入的到全局变量put_fdbs中
- dp_key packet in的OpenFlow流表的metadata
- port_key packet in的OpenFlow流表的reg14
- mac 上送到controller的packet header中获取
process_packet_in
|- /* 根据action code执行不同的handler */
|- pinctrl_handle_put_fdb
|- ovn_fdb_add
pinctrl模块在执行pinctrl_run时会调用run_put_fdbs,遍历全局变量put_fdbs查找SB中的FDB表,如果条目查不到则将fdb则将信息插入到SB的FDB表中(表中包含信息:dp_key datapath的tunnel key,port_key port的tunnel_key,mac port的mac地
pinctrl_run
|- run_put_fdbs
|- /* 遍历FDB表条目 */
|- run_put_fdb
广播泛洪部分
physical_run中会遍历SB的Port_Binding表,对每一条记录执行consider_port_binding
consider_port_binding中根据localvif_to_ofport查找Port_Binding的logical_port数据的ofport
如果没有ofport则该Port_Binding条目对应的port在remote chassis
对于port在remote chassis,根据Port_Binding中的redirect_type判断如果不是bridged
则执行put_remote_port_redirect_overlay来下发泛洪到remote chassis的OpenFlow流表
static void
consider_port_binding(struct ovsdb_idl_index *sbrec_port_binding_by_name,
enum mf_field_id mff_ovn_geneve,
const struct simap *ct_zones,
const struct sset *active_tunnels,
const struct hmap *local_datapaths,
const struct sbrec_port_binding *binding,
const struct sbrec_chassis *chassis,
struct ovn_desired_flow_table *flow_table,
struct ofpbuf *ofpacts_p)
{
...
const char *redirect_type = smap_get(&binding->options,
"redirect-type");
/* Remote port connected by tunnel */
/* Table 32, priority 100.
* =======================
*
* Handles traffic that needs to be sent to a remote hypervisor. Each
* flow matches an output port that includes a logical port on a remote
* hypervisor, and tunnels the packet to that hypervisor.
*/
match_init_catchall(&match);
ofpbuf_clear(ofpacts_p);
/* Match MFF_LOG_DATAPATH, MFF_LOG_OUTPORT. */
match_set_metadata(&match, htonll(dp_key));
match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, port_key);
if (redirect_type && !strcasecmp(redirect_type, "bridged")) {
put_remote_port_redirect_bridged(binding, local_datapaths,
ld, &match, ofpacts_p,
flow_table);
} else {
put_remote_port_redirect_overlay(binding, is_ha_remote,
ha_ch_ordered, mff_ovn_geneve,
tun, port_key, &match, ofpacts_p,
flow_table);
}
所以arp广播包只会泛洪到有该子网Port_Binding记录的chassis上
TODO:
- pinctrl中维护FDB部分处理细化
- physical_run中consider_mc_group部分细化
附件:
[1] 创建port脚本
#!/usr/bin/bash
# Create port
# neutron port-create --name lucheng-tap --binding:host_id=node-3.domain.tld share_net
# Close port security group
# neutron port-update --no-security-groups lucheng-tap
# neutron port-update --port_security_enabled=false lucheng-tap
HOST=""
MAC=""
get_port_info() {
source openrc
port_id="$1"
HOST=$(neutron port-show -F binding:host_id -f value "$port_id")
MAC=$(neutron port-show -F mac_address -f value "$port_id")
ip_info=$(neutron port-show -F fixed_ips -f value "$port_id")
echo Port "$port_id" Mac: "$MAC" HOST: "$HOST"
echo IP Info: "$ip_info"
}
create_ns() {
port_id="$1"
iface_name="lc-tap-${port_id:0:8}"
netns_name="lc-vm-${port_id:0:8}"
ssh "$HOST" ovs-vsctl add-port br-int "$iface_name" \
-- set Interface "$iface_name" type=internal \
-- set Interface "$iface_name" external_ids:iface-id="$port_id" \
-- set Interface "$iface_name" external_ids:attached-mac="$MAC" \
-- set Interface "$iface_name" external_ids:iface-status=active
ssh "$HOST" ip netns add "$netns_name"
ssh "$HOST" ip l set dev "$iface_name" address "$MAC"
ssh "$HOST" ip l set "$iface_name" netns "$netns_name"
ssh "$HOST" ip netns exec "$netns_name" ip l set lo up
ssh "$HOST" ip netns exec "$netns_name" ip l set "$iface_name" up
}
main() {
get_port_info "$1"
create_ns "$1"
}
main $@
[2] neutron和ovn数据库中port相关信息
()[root@busybox-openstack-77f8c69f47-b5sn8 /]# neutron port-show a7b8fccd-eb2c-422c-83a7-3bb98e24ff4a
+-----------------------+----------------------------------------------------------------------------------------+
| Field | Value |
+-----------------------+----------------------------------------------------------------------------------------+
| admin_state_up | True |
| allowed_address_pairs | |
| binding:host_id | node-9.domain.tld |
| binding:profile | {} |
| binding:vif_details | {"port_filter": true} |
| binding:vif_type | ovs |
| binding:vnic_type | normal |
| created_at | 2021-12-14T02:22:42Z |
| description | |
| device_id | |
| device_owner | |
| extra_dhcp_opts | |
| fixed_ips | {"subnet_id": "9399a7fd-440f-4981-9fc1-4e5c22f9e6ee", "ip_address": "192.168.111.191"} |
| id | a7b8fccd-eb2c-422c-83a7-3bb98e24ff4a |
| mac_address | fa:16:3e:84:61:02 |
| name | lucheng-tap-no-sec |
| network_id | 2c4dfab0-7362-4ad8-9a92-27cec0fe6c05 |
| port_security_enabled | False |
| project_id | e5df2ef9a9b4497aa0db7df639f161c9 |
| qos_network_policy_id | |
| qos_policy_id | |
| resource_request | |
| revision_number | 6 |
| security_groups | |
| status | ACTIVE |
| tags | |
| tenant_id | e5df2ef9a9b4497aa0db7df639f161c9 |
| updated_at | 2021-12-14T02:24:54Z |
+-----------------------+----------------------------------------------------------------------------------------+
()[root@ovn-tool-0 /]# ovn-nbctl find Logical_Switch_Port name=a7b8fccd-eb2c-422c-83a7-3bb98e24ff4a
_uuid : aa779f10-250c-4919-8563-2d9462d5aedd
addresses : ["fa:16:3e:84:61:02 192.168.111.191", unknown]
dhcpv4_options : 45760679-a0db-453a-936b-fbb1ed6e0e9a
dhcpv6_options : []
dynamic_addresses : []
enabled : true
external_ids : {"neutron:cidrs"="192.168.111.191/24", "neutron:device_id"="", "neutron:device_owner"="", "neutron:network_name"=neutron-2c4dfab0-7362-4ad8-9a92-27cec0fe6c05, "neutron:port_name"=lucheng-tap-no-sec, "neutron:project_id"=e5df2ef9a9b4497aa0db7df639f161c9, "neutron:revision_number"="6", "neutron:security_group_ids"=""}
ha_chassis_group : []
name : "a7b8fccd-eb2c-422c-83a7-3bb98e24ff4a"
options : {mcast_flood_reports="true", requested-chassis=node-9.domain.tld}
parent_name : []
port_security : []
tag : []
tag_request : []
type : ""
up : true
()[root@ovn-tool-0 /]# ovn-sbctl find Port_Binding logical_port=a7b8fccd-eb2c-422c-83a7-3bb98e24ff4a
_uuid : 384550d8-91ed-495e-920e-06054c07723b
chassis : 79934578-e048-4e4e-9807-705ccfd3d760
datapath : a899c3df-117a-4a65-8e86-d3cb4663f726
encap : []
external_ids : {name=lucheng-tap-no-sec, "neutron:cidrs"="192.168.111.191/24", "neutron:device_id"="", "neutron:device_owner"="", "neutron:network_name"=neutron-2c4dfab0-7362-4ad8-9a92-27cec0fe6c05, "neutron:port_name"=lucheng-tap-no-sec, "neutron:project_id"=e5df2ef9a9b4497aa0db7df639f161c9, "neutron:revision_number"="6", "neutron:security_group_ids"=""}
gateway_chassis : []
ha_chassis_group : []
logical_port : "a7b8fccd-eb2c-422c-83a7-3bb98e24ff4a"
mac : ["fa:16:3e:84:61:02 192.168.111.191", unknown]
nat_addresses : []
options : {mcast_flood_reports="true", requested-chassis=node-9.domain.tld}
other_config : {}
parent_port : []
tag : []
tunnel_key : 451
type : ""
up : true
virtual_parent : []
()[root@ovn-tool-0 /]# ovn-sbctl list Multicast_Group | grep -C 3 384550d8-91ed-495e-920e-06054c07723b --color | grep -E 'name|tunnel_key|datapath'
datapath : a899c3df-117a-4a65-8e86-d3cb4663f726
name : _MC_unknown
tunnel_key : 32769
datapath : a899c3df-117a-4a65-8e86-d3cb4663f726
name : _MC_flood
tunnel_key : 32768
datapath : a899c3df-117a-4a65-8e86-d3cb4663f726
name : _MC_mrouter_static
tunnel_key : 32771
datapath : a899c3df-117a-4a65-8e86-d3cb4663f726
name : _MC_flood_l2
tunnel_key : 32773