关于openflow的规范不是本文讨论范畴,这篇主要讨论OVS对openflow的支持,代码基本都在ofproto/目录下
struct ofproto代表了一个openflow switch的模型,是一个"接口类"
struct ofproto {
struct hmap_node hmap_node; /* In global 'all_ofprotos' hmap. */ 该ofproto对应的全局ofproto的hash map的结点
const struct ofproto_class *ofproto_class; bridge模式的实现,真正的操作在ofproto_class里完成
char *type; /* Datapath type. */
char *name; /* Datapath name. */
/* Settings. */
uint64_t fallback_dpid; /* Datapath ID if no better choice found. */
uint64_t datapath_id; /* Datapath ID. */
unsigned flow_eviction_threshold; /* Threshold at which to begin flow
* table eviction. Only affects the
* ofproto-dpif implementation */ 开始回收流表项的阀值
bool forward_bpdu; /* Option to allow forwarding of BPDU frames
* when NORMAL action is invoked. */
char *mfr_desc; /* Manufacturer. */
char *hw_desc; /* Hardware. */
char *sw_desc; /* Software version. */
char *serial_desc; /* Serial number. */
char *dp_desc; /* Datapath description. */
enum ofp_config_flags frag_handling; /* One of OFPC_*. */
/* Datapath. */
struct hmap ports; /* Contains "struct ofport"s. */
struct shash port_by_name;
/* Flow tables. */
struct oftable *tables; /* openflow switch flow table */
int n_tables;
/* OpenFlow connections. */
struct connmgr *connmgr;
/* Flow table operation tracking. */
int state; /* Internal state. */
struct list pending; /* List of "struct ofopgroup"s. */
unsigned int n_pending; /* list_size(&pending). */
struct hmap deletions; /* All OFOPERATION_DELETE "ofoperation"s. */
/* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
*
* This is deprecated. It is only for compatibility with broken device
* drivers in old versions of Linux that do not properly support VLANs when
* VLAN devices are not used. When broken device drivers are no longer in
* widespread use, we will delete these interfaces. */
unsigned long int *vlan_bitmap; /* 4096-bit bitmap of in-use VLANs. */
bool vlans_changed; /* True if new VLANs are in use. */
int min_mtu; /* Current MTU of non-internal ports. */
};
struct ofport代表了一个openflow switch的port,ofport必定属于一个ofproto
struct ofport {
struct hmap_node hmap_node; /* In struct ofproto's "ports" hmap. */ ofport在ofproto的hash map中的位置
struct ofproto *ofproto; /* The ofproto that contains this port. */ ofport属于的ofproto
struct netdev *netdev; ofport对应的netdev设备
struct ofputil_phy_port pp;
uint16_t ofp_port; /* OpenFlow port number. */
unsigned int change_seq;
int mtu;
};
/* A flow table within a "struct ofproto". */
struct oftable {
enum oftable_flags flags;
struct classifier cls; /* Contains "struct rule"s. */
char *name; /* Table name exposed via OpenFlow, or NULL. */
/* Maximum number of flows or UINT_MAX if there is no limit besides any
* limit imposed by resource limitations. */
unsigned int max_flows;
struct mf_subfield *eviction_fields; 这项如果为空,那么一旦oftable出现overflow,会直接drop新的flow,否则会选一个老的flow老化掉给新flow
size_t n_eviction_fields;
uint32_t eviction_group_id_basis;
struct hmap eviction_groups_by_id;
struct heap eviction_groups_by_size;
};
/* An OpenFlow flow within a "struct ofproto".
*
* With few exceptions, ofproto implementations may look at these fields but
* should not modify them. */
struct rule {
struct list ofproto_node; /* Owned by ofproto base code. */
struct ofproto *ofproto; /* The ofproto that contains this rule. */ rule属于的ofproto
struct cls_rule cr; /* In owning ofproto's classifier. */ ofproto->tables是一个oftable, table->cls是struct classifier,classifier会包含cls_table,里面包含cls_rule
struct ofoperation *pending; /* Operation now in progress, if nonnull. */
ovs_be64 flow_cookie; /* Controller-issued identifier. */
long long int created; /* Creation time. */
long long int modified; /* Time of last modification. */
long long int used; /* Last use; time created if never used. */
uint16_t hard_timeout; /* In seconds from ->modified. */
uint16_t idle_timeout; /* In seconds from ->used. */
uint8_t table_id; /* Index in ofproto's 'tables' array. */
bool send_flow_removed; /* Send a flow removed message? */
/* Eviction groups. */
bool evictable; /* If false, prevents eviction. */
struct heap_node evg_node; /* In eviction_group's "rules" heap. */
struct eviction_group *eviction_group; /* NULL if not in any group. */
union ofp_action *actions; /* OpenFlow actions. */ actions是一个由n_actions个union ofp_action组成的数组,最终会被转成nlattr的格式给netlink发送出去
int n_actions; /* Number of elements in actions[]. */
};
我的理解是,rule代表了flow match的共性,如果一条flow被rule match了,那么就会执行rule->actions。
struct ofproto_class是一个bridge模式的implementation,目前struct ofproto的一个实现是struct ofproto_dpif,其struct ofproto_class的实例为ofproto_dpif_class。struct ofproto代表了一个openflow switch,struct ofport代表了ofproto的port,struct rule代表了ofproto里的flow。所有这些都是接口类,其真正的实现类是struct ofproto_dpif, struct ofport_dpif, struct rule_dpif。
同样的struct dpif_class也是一个接口类,其实现有dpif_netdev_class, dpif_linux_class
struct ofproto_class {
/* Enumerates the types of all support ofproto types into 'types'. The
* caller has already initialized 'types' and other ofproto classes might
* already have added names to it. */
void (*enumerate_types)(struct sset *types);
该函数列出ofproto_class的实现类,OVS全局有static const struct dpif_class *base_dpif_classes[] = {&dpif_linux_class, &dpif_netdev_class},以及一个全局的struct shash结构static struct shash dpif_classes,dp_initialize函数会把base_dpif_classes数组里的dpif_class结构通过调用dp_register_provider注册到全局的shash结构dpif_classes中。其中dpif_linux_class的type是system,而dpif_netdev_class的type是netdev
全局struct shash的key是type,value是struct registered_dpif_class*,该结构包含了一个const struct dpif_class*,以及这个struct dpif_class的一个refcount
最后回到enumerate_types,该函数就是把全局dpif_class的types取出来存到一个sset里面
int (*del)(const char *type, const char *name);
该函数删除名字为name,类型为type的datapath。datapath在用户态的类型结构定义在/lib/dpif-provider.h中,dpif也包含了一个struct dpif_class的接口。
该函数首先调用dpif_open获取datapath对应的struct dpif结构,该函数实际调用的是dpif_class->open,然后调用dpif_delete删除struct dpif对应的datapath,该操作调用了dpif_class->destroy。
struct dpif {
const struct dpif_class *dpif_class;
char *base_name;
char *full_name;
uint8_t netflow_engine_type;
uint8_t netflow_engine_id;
};
这个struct dpif是一个抽象的接口类
struct dpif_linux {
struct dpif dpif;
int dp_ifindex; 应该是/sys/classs/net/xxxx/ifindex的值,每个设备都会对应一个
/* Upcall messages. */
struct nl_sock *upcall_socks[N_UPCALL_SOCKS];
uint32_t ready_mask; /* 1-bit for each sock with unread messages. */
int epoll_fd; /* epoll fd that includes the upcall socks. */
/* Change notification. */
struct sset changed_ports; /* Ports that have changed. */
struct nln_notifier *port_notifier;
bool change_error;
/* Port number allocation. */
uint16_t alloc_port_no;
};
struct dpif_linux是具体的实例结构,其中包含了struct dpif
int (*construct)(struct ofproto *ofproto);
void (*destruct)(struct ofproto *ofproto);
首先要提及struct ofproto_dpif这个结构,由于struct ofproto只是一个接口类,因此其实现类为struct ofproto_dpif,定义如下
struct ofproto_dpif {
struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */ 全局的ofproto_dpif会形成一个hmap,为all_ofproto_dpifs
struct ofproto up; 对应的ofproto结构
struct dpif *dpif; 对应的dpif,其实际上是dpif_linux/dpif_netdev
int max_ports; ofproto_dpif最多可以有多少个port
/* Special OpenFlow rules. */
struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */
struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */
/* Statistics. */
uint64_t n_matches;
/* Bridging. */
struct netflow *netflow;
struct dpif_sflow *sflow;
struct hmap bundles; /* Contains "struct ofbundle"s. */
struct mac_learning *ml; mac/port对应关系的cam表
struct ofmirror *mirrors[MAX_MIRRORS];
bool has_mirrors;
bool has_bonded_bundles;
/* Expiration. */
struct timer next_expiration;
/* Facets. */
struct hmap facets; struct facet代表了match的flow,所有这些facet组织在facets的hmap中
struct hmap subfacets;
struct governor *governor;
/* Revalidation. */
struct table_dpif tables[N_TABLES];
bool need_revalidate;
struct tag_set revalidate_set;
/* Support for debugging async flow mods. */
struct list completions;
bool has_bundle_action; /* True when the first bundle action appears. */
struct netdev_stats stats; /* To account packets generated and consumed in
* userspace. */
/* Spanning tree. */
struct stp *stp;
long long int stp_last_tick;
/* VLAN splinters. */
struct hmap realdev_vid_map; /* (realdev,vid) -> vlandev. */
struct hmap vlandev_map; /* vlandev -> (realdev,vid). */
};
construct函数用来初始化struct ofproto中一系列的成员,然后hmap_insert到all_ofproto_dpifs的全局hmap中。destruct则是相反的操作
int (*run)(struct ofproto *ofproto);
int (*run_fast)(struct ofproto *ofproto);
该函数会包含多个xxx_run的子函数,e.g.
dpif_run,对于dpif_linux_class而言即dpif_linux_run,代码上看似乎是做一些netlink notifier的callback
run_fast,反复调用handle_upcalls,最多FLOW_MISS_MAX_BATCH,e.g. 处理flow miss的upcall
对于ofproto下的所有ofport,调用port_run
对于cam表,调用mac_learning_run,该函数通过lru查找已经time expire的mac_entry,并把这些entry老化
int (*port_construct)(struct ofport *ofport);
void (*port_destruct)(struct ofport *ofport);
struct ofport的接口类的实现是struct ofport_dpif
struct ofport_dpif {
struct ofport up;
uint32_t odp_port; 我猜ofp_port是openflow视图下的port,odp_port应该是datapath视图下的port
struct ofbundle *bundle; /* Bundle that contains this port, if any. */ bundle是用来提供类似lacp的bonding,一个bonding会有多个ofport
struct list bundle_node; /* In struct ofbundle's "ports" list. */
struct cfm *cfm; /* Connectivity Fault Management, if any. */
tag_type tag; /* Tag associated with this port. */
uint32_t bond_stable_id; /* stable_id to use as bond slave, or 0. */
bool may_enable; /* May be enabled in bonds. */
long long int carrier_seq; /* Carrier status changes. */
/* Spanning tree. */
struct stp_port *stp_port; /* Spanning Tree Protocol, if any. */
enum stp_state stp_state; /* Always STP_DISABLED if STP not in use. */
long long int stp_state_entered;
struct hmap priorities; /* Map of attached 'priority_to_dscp's. */
/* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
*
* This is deprecated. It is only for compatibility with broken device
* drivers in old versions of Linux that do not properly support VLANs when
* VLAN devices are not used. When broken device drivers are no longer in
* widespread use, we will delete these interfaces. */
uint16_t realdev_ofp_port;
int vlandev_vid;
};
void (*port_modified)(struct ofport *ofport);
void (*port_reconfigured)(struct ofport *ofport, enum ofputil_port_config old_config);
port_reconfigured函数,每次port的配置ofputil_port_config改变,需要使得ofproto->need_revalidate = true。因为ofport的改变会造成ofproto原有的flow table的相关表项失效
OVS定义的port的配置和状态码如下,
enum ofputil_port_config {
/* OpenFlow 1.0 and 1.1 share these values for these port config bits. */
OFPUTIL_PC_PORT_DOWN = 1 << 0, /* Port is administratively down. */
OFPUTIL_PC_NO_RECV = 1 << 2, /* Drop all packets received by port. */
OFPUTIL_PC_NO_FWD = 1 << 5, /* Drop packets forwarded to port. */
OFPUTIL_PC_NO_PACKET_IN = 1 << 6, /* No send packet-in msgs for port. */
/* OpenFlow 1.0 only. */
OFPUTIL_PC_NO_STP = 1 << 1, /* No 802.1D spanning tree for port. */
OFPUTIL_PC_NO_RECV_STP = 1 << 3, /* Drop received 802.1D STP packets. */
OFPUTIL_PC_NO_FLOOD = 1 << 4, /* Do not include port when flooding. */
/* There are no OpenFlow 1.1-only bits. */
};
enum ofputil_port_state {
/* OpenFlow 1.0 and 1.1 share this values for these port state bits. */
OFPUTIL_PS_LINK_DOWN = 1 << 0, /* No physical link present. */
/* OpenFlow 1.1 only. */
OFPUTIL_PS_BLOCKED = 1 << 1, /* Port is blocked */
OFPUTIL_PS_LIVE = 1 << 2, /* Live for Fast Failover Group. */
/* OpenFlow 1.0 only. */
OFPUTIL_PS_STP_LISTEN = 0 << 8, /* Not learning or relaying frames. */
OFPUTIL_PS_STP_LEARN = 1 << 8, /* Learning but not relaying frames. */
OFPUTIL_PS_STP_FORWARD = 2 << 8, /* Learning and relaying frames. */
OFPUTIL_PS_STP_BLOCK = 3 << 8, /* Not part of spanning tree. */
OFPUTIL_PS_STP_MASK = 3 << 8 /* Bit mask for OFPPS10_STP_* values. */
};
int (*port_add)(struct ofproto *ofproto, struct netdev *netdev, uint16_t *ofp_portp);
int (*port_del)(struct ofproto *ofproto, uint16_t ofp_port);
port_add,构造一个struct dpif_linux_vport的request,通过dpif_linux_vport_transact发送netlink消息给ofproto,添加一个port。port_del和port_add原理一样。
int (*port_dump_start)(const struct ofproto *ofproto, void **statep);
int (*port_dump_next)(const struct ofproto *ofproto, void *state, struct ofproto_port *port);
int (*port_dump_done)(const struct ofproto *ofproto, void *state);
这三个函数用来遍历ofproto里的所有port
int (*port_poll)(const struct ofproto *ofproto, char **devnamep);
void (*port_poll_wait)(const struct ofproto *ofproto);
port_port用来查询ofproto中已经变化了的port,实际上是查询dpif->changed_ports
enum ofperr (*rule_construct)(struct rule *rule);
void (*rule_destruct)(struct rule *rule);
/* Applies the actions in 'rule' to 'packet'. (This implements sending
* buffered packets for OpenFlow OFPT_FLOW_MOD commands.)
*
* Takes ownership of 'packet' (so it should eventually free it, with
* ofpbuf_delete()).
*
* 'flow' reflects the flow information for 'packet'. All of the
* information in 'flow' is extracted from 'packet', except for
* flow->tun_id and flow->in_port, which are assigned the correct values
* for the incoming packet. The register values are zeroed. 'packet''s
* header pointers (e.g. packet->l3) are appropriately initialized.
*
* The implementation should add the statistics for 'packet' into 'rule'.
*
* Returns 0 if successful, otherwise an OpenFlow error code. */
enum ofperr (*rule_execute)(struct rule *rule, const struct flow *flow,
struct ofpbuf *packet);
rule_execute给flow添加actions,这里packet应该存的是skb的结构体,flow是从packet中extract出来的流
下面来看struct dpif,这是一个接口类,实现全在struct dpif_class里,
struct dpif {
const struct dpif_class *dpif_class;
char *base_name;
char *full_name;
uint8_t netflow_engine_type;
uint8_t netflow_engine_id;
};
struct dpif_class {
/* Type of dpif in this class, e.g. "system", "netdev", etc.
*
* One of the providers should supply a "system" type, since this is
* the type assumed if no type is specified when opening a dpif. */
const char *type;
int (*enumerate)(struct sset *all_dps);
/* Attempts to open an existing dpif called 'name', if 'create' is false,
* or to open an existing dpif or create a new one, if 'create' is true.
*
* 'dpif_class' is the class of dpif to open.
*
* If successful, stores a pointer to the new dpif in '*dpifp', which must
* have class 'dpif_class'. On failure there are no requirements on what
* is stored in '*dpifp'. */
int (*open)(const struct dpif_class *dpif_class,
const char *name, bool create, struct dpif **dpifp);
/* Closes 'dpif' and frees associated memory. */
void (*close)(struct dpif *dpif);
/* Attempts to destroy the dpif underlying 'dpif'.
*
* If successful, 'dpif' will not be used again except as an argument for
* the 'close' member function. */
int (*destroy)(struct dpif *dpif);
/* Performs periodic work needed by 'dpif', if any is necessary. */
void (*run)(struct dpif *dpif);
/* Arranges for poll_block() to wake up if the "run" member function needs
* to be called for 'dpif'. */
void (*wait)(struct dpif *dpif);
/* Adds 'netdev' as a new port in 'dpif'. If successful, sets '*port_no'
* to the new port's port number. */
int (*port_add)(struct dpif *dpif, struct netdev *netdev,
uint16_t *port_no);
/* Removes port numbered 'port_no' from 'dpif'. */
int (*port_del)(struct dpif *dpif, uint16_t port_no);
int (*flow_get)(const struct dpif *dpif,
const struct nlattr *key, size_t key_len,
struct ofpbuf **actionsp, struct dpif_flow_stats *stats);
int (*flow_put)(struct dpif *dpif, const struct dpif_flow_put *put);
int (*flow_del)(struct dpif *dpif, const struct dpif_flow_del *del);
/* Performs the 'execute->actions_len' bytes of actions in
* 'execute->actions' on the Ethernet frame specified in 'execute->packet'
* taken from the flow specified in the 'execute->key_len' bytes of
* 'execute->key'. ('execute->key' is mostly redundant with
* 'execute->packet', but it contains some metadata that cannot be
* recovered from 'execute->packet', such as tun_id and in_port.) */
int (*execute)(struct dpif *dpif, const struct dpif_execute *execute);
};
全局的struct dpif_class数组包含了两个dpif_class实例,dpif_linux_class以及dpif_netdev_class
tatic const struct dpif_class *base_dpif_classes[] = {
#ifdef HAVE_NETLINK
&dpif_linux_class,
#endif
&dpif_netdev_class,
};
dp_initialize会在初始化的时候,调用dp_register_provider来注册base_dpif_classes,同时全局还有shash dpif_classe
dpif_open, dpif_create都会调用dp_open来打开/创建一个struct dp_if,一般都是调用dpif_linux_class->open,dpif_close则是关闭datapath,通过一个registered_dpif_class->refcount来计算type类型的datapath的引用计数
dpif_port_add, dpif_port_del,通过dpif_linux_class->port_add, dpif_linux_class->port_del执行port的增删
dpif_flow_flush,删除datapath流表的所有flow
dpif_flow_get,通过flow key在datapath里查询flow,如果flow同时有action,填充到struct ofpbuf** actionsp里
dpif_flow_put, dpif_flow_put__,修改flow的action
dpif_flow_del, dpif_flow_del__,删除flow key为key的流
struct dpif_class的一个实例是struct dpif_class dpif_linux_class,
const struct dpif_class dpif_linux_class = {
"system",
dpif_linux_enumerate,
dpif_linux_open,
dpif_linux_close,
dpif_linux_destroy,
dpif_linux_run,
dpif_linux_wait,
dpif_linux_get_stats,
dpif_linux_port_add,
dpif_linux_port_del,
dpif_linux_port_query_by_number,
dpif_linux_port_query_by_name,
dpif_linux_get_max_ports,
dpif_linux_port_get_pid,
dpif_linux_port_dump_start,
dpif_linux_port_dump_next,
dpif_linux_port_dump_done,
dpif_linux_port_poll,
dpif_linux_port_poll_wait,
dpif_linux_flow_get,
dpif_linux_flow_put,
dpif_linux_flow_del,
dpif_linux_flow_flush,
dpif_linux_flow_dump_start,
dpif_linux_flow_dump_next,
dpif_linux_flow_dump_done,
dpif_linux_execute,
dpif_linux_operate,
dpif_linux_recv_set,
dpif_linux_queue_to_priority,
dpif_linux_recv,
dpif_linux_recv_wait,
dpif_linux_recv_purge,
};
这些操作大多数都需要通过netlink和内核通信来完成,因此定义了一些消息格式如下,
struct dpif_linux_dp {
/* Generic Netlink header. */
uint8_t cmd;
/* struct ovs_header. */
int dp_ifindex;
/* Attributes. */
const char *name; /* OVS_DP_ATTR_NAME. */
const uint32_t *upcall_pid; /* OVS_DP_UPCALL_PID. */
struct ovs_dp_stats stats; /* OVS_DP_ATTR_STATS. */
};
struct dpif_linux_flow {
/* Generic Netlink header. */
uint8_t cmd;
/* struct ovs_header. */
unsigned int nlmsg_flags;
int dp_ifindex;
/* Attributes.
*
* The 'stats' member points to 64-bit data that might only be aligned on
* 32-bit boundaries, so get_unaligned_u64() should be used to access its
* values.
*
* If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
* the Netlink version of the command, even if actions_len is zero. */
const struct nlattr *key; /* OVS_FLOW_ATTR_KEY. */
size_t key_len;
const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */
size_t actions_len;
const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */
const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */
const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */
bool clear; /* OVS_FLOW_ATTR_CLEAR. */
};
和内核进行netlink通信的函数是dpif_linux_dp_transaction,该函数传递一个struct dpif_linux_dp做为request,reply,这是一个同步的transaction
在linux还有个结构struct dpif_linux,用来把struct dpif封装一层
/* Datapath interface for the openvswitch Linux kernel module. */
struct dpif_linux {
struct dpif dpif;
int dp_ifindex;
/* Upcall messages. */
struct nl_sock *upcall_socks[N_UPCALL_SOCKS];
uint32_t ready_mask; /* 1-bit for each sock with unread messages. */
int epoll_fd; /* epoll fd that includes the upcall socks. */
/* Change notification. */
struct sset changed_ports; /* Ports that have changed. */
struct nln_notifier *port_notifier;
bool change_error;
/* Port number allocation. */
uint16_t alloc_port_no;
};
dpif_linux_open,首先通过dpif_linux_dp_transaction发送OVS_DP_CMD_NEW或OVS_DP_CMD_GET命令,并获得回复。之后调用open_dpif,基于reply构造一个struct dpif
dpif_linux_close,close和内核通信的netlink socket,销毁之前创建的netlink notifier,表示对port的变化不再关心
dpif_linux_destroy,调用dpif_linux_dp_transact,向内核发送OVS_DP_CMD_DEL命令,销毁datapath
static int
dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev,
uint16_t *port_nop) 把一个struct netdev加到datapath的port里
{
struct dpif_linux *dpif = dpif_linux_cast(dpif_);
const char *name = netdev_get_name(netdev);
const char *type = netdev_get_type(netdev); ovs_vport_type里是所有vport的类型
struct dpif_linux_vport request, reply;
const struct ofpbuf *options;
struct ofpbuf *buf;
int error, i = 0, max_ports = MAX_PORTS;
dpif_linux_vport_init(&request);
request.cmd = OVS_VPORT_CMD_NEW;
request.dp_ifindex = dpif->dp_ifindex; dpif->dp_ifindex是/sys/class/net里datapath设备的ifindex号
request.type = netdev_vport_get_vport_type(netdev);
if (request.type == OVS_VPORT_TYPE_UNSPEC) {
VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has "
"unsupported type `%s'",
dpif_name(dpif_), name, type);
return EINVAL;
}
request.name = name;
options = netdev_vport_get_options(netdev);
if (options && options->size) {
request.options = options->data;
request.options_len = options->size;
}
if (request.type == OVS_VPORT_TYPE_NETDEV) {
netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false); 禁用LRO
}
/* Loop until we find a port that isn't used. */
do {
uint32_t upcall_pid;
request.port_no = ++dpif->alloc_port_no; alloc_port_no是已经分配的port no
upcall_pid = dpif_linux_port_get_pid(dpif_, request.port_no);
request.upcall_pid = &upcall_pid;
error = dpif_linux_vport_transact(&request, &reply, &buf);
if (!error) {
*port_nop = reply.port_no;
VLOG_DBG("%s: assigning port %"PRIu32" to netlink pid %"PRIu32,
dpif_name(dpif_), request.port_no, upcall_pid);
} else if (error == EFBIG) {
/* Older datapath has lower limit. */
max_ports = dpif->alloc_port_no;
dpif->alloc_port_no = 0;
}
ofpbuf_delete(buf);
} while ((i++ < max_ports)
&& (error == EBUSY || error == EFBIG));
return error;
}
dpif_linux_port_del,这里又涉及了一个消息结构体struct dpif_linux_vport
struct dpif_linux_vport {
/* Generic Netlink header. */
uint8_t cmd;
/* ovs_vport header. */
int dp_ifindex;
uint32_t port_no; /* UINT32_MAX if unknown. */
enum ovs_vport_type type;
/* Attributes.
*
* The 'stats' member points to 64-bit data that might only be aligned on
* 32-bit boundaries, so use get_unaligned_u64() to access its values.
*/
const char *name; /* OVS_VPORT_ATTR_NAME. */
const uint32_t *upcall_pid; /* OVS_VPORT_ATTR_UPCALL_PID. */
const struct ovs_vport_stats *stats; /* OVS_VPORT_ATTR_STATS. */
const uint8_t *address; /* OVS_VPORT_ATTR_ADDRESS. */
const struct nlattr *options; /* OVS_VPORT_ATTR_OPTIONS. */
size_t options_len;
};
dpif_linux_port_del配置vport.cmd = OVS_VPORT_CMD_DEL, vport.dp_ifindex = dpif->dp_ifindex vport.port_no = port_no,调用dpif_linux_vport_transact和内核通信
dpif_linux_flow_get,所有flow相关消息结构为struct dpif_linux_flow, 调用的通信函数为dpif_linux_flow_transaction,命令为OVS_FLOW_CMD_GET
struct dpif_linux_flow {
/* Generic Netlink header. */
uint8_t cmd;
/* struct ovs_header. */
unsigned int nlmsg_flags;
int dp_ifindex;
/* Attributes.
*
* The 'stats' member points to 64-bit data that might only be aligned on
* 32-bit boundaries, so get_unaligned_u64() should be used to access its
* values.
*
* If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in
* the Netlink version of the command, even if actions_len is zero. */
const struct nlattr *key; /* OVS_FLOW_ATTR_KEY. */
size_t key_len;
const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */
size_t actions_len;
const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */
const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */
const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */
bool clear; /* OVS_FLOW_ATTR_CLEAR. */
};
dpif_linux_flow_put,其命令为OVS_FLOW_CMD_NEW or OVS_FLOW_CMD_SET,dpif_linux_flow_del,命令为OVS_FLOW_CMD_DEL