版权声明:如有需要,可供转载,但请注明出处:https://blog.csdn.net/City_of_skey/article/details/85240141
目录
1、网桥简介
2、网桥初始化
2.1 struct net_bridge
2.2 struct net_bridge_port
2.3 struct net_bridge_fdb_entry
2.4 br_init
3、新建网桥
3.1、br_add_bridge
3.2、new_bridge_dev
3.3、br_dev_setup
4、新增端口
4.1、add_del_if
4.2、br_add_if
linux下的网桥是一个虚拟设备,本质上说是一个高层次的二层虚拟设备,它把其他的从设备虚拟为一个port。当把一个网卡设备加入的网桥后,网桥的mac地址就是下面所有网卡最小的那个mac地址,然后所有的网卡共享网桥的ip。网卡的接受、发送数据包就交给网桥决策。网桥工作在链路层。
如下图所示网桥收包流程图:
struct net_bridge是描述网桥的结构体,比较重要的元素hash,hash这个哈希表保存了mac地址对应的网络端口也就是mac地址学习,网桥转发时就要根据mac地址在这个哈希表中查找从那个端口转发出去。
struct net_bridge
{
spinlock_t lock; //自旋锁
struct list_head port_list; //网桥下设备链表
struct net_device *dev; //网桥的设备实例结构体
struct br_cpu_netstats __percpu *stats;
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE]; //保存mac地址学习也就是net_bridge_fsb_entry结构体
unsigned long feature_mask;
#ifdef CONFIG_BRIDGE_NETFILTER
struct rtable fake_rtable;
#endif
unsigned long flags;
#define BR_SET_MAC_ADDR 0x00000001
/* STP */
bridge_id designated_root;
bridge_id bridge_id;
u32 root_path_cost;
unsigned long max_age;
unsigned long hello_time;
unsigned long forward_delay;
unsigned long bridge_max_age;
unsigned long ageing_time;
unsigned long bridge_hello_time;
unsigned long bridge_forward_delay;
u8 group_addr[ETH_ALEN];
u16 root_port;
/*STP协议类型*/
enum {
BR_NO_STP, /* no spanning tree */
BR_KERNEL_STP, /* old STP in kernel */
BR_USER_STP, /* new RSTP in userspace */
} stp_enabled;
unsigned char topology_change;
unsigned char topology_change_detected;
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
unsigned char multicast_router;
u8 multicast_disabled:1;
u32 hash_elasticity;
u32 hash_max;
u32 multicast_last_member_count;
u32 multicast_startup_queries_sent;
u32 multicast_startup_query_count;
unsigned long multicast_last_member_interval;
unsigned long multicast_membership_interval;
unsigned long multicast_querier_interval;
unsigned long multicast_query_interval;
unsigned long multicast_query_response_interval;
unsigned long multicast_startup_query_interval;
spinlock_t multicast_lock;
struct net_bridge_mdb_htable *mdb;
struct hlist_head router_list;
struct hlist_head mglist;
/*一些定时器*/
struct timer_list multicast_router_timer;
struct timer_list multicast_querier_timer;
struct timer_list multicast_query_timer;
#endif
struct timer_list hello_timer;
struct timer_list tcn_timer;
struct timer_list topology_change_timer;
struct timer_list gc_timer;
struct kobject *ifobj;
};
struct net_bridge_port描述网桥下的网卡端口。
struct net_bridge_port
{
struct net_bridge *br; //端口所属的网桥设备
struct net_device *dev; //端口设备的接头体
struct list_head list; //网桥下的端口链表
/* STP */
u8 priority;
u8 state;
u16 port_no;
unsigned char topology_change_ack;
unsigned char config_pending;
port_id port_id;
port_id designated_port;
bridge_id designated_root;
bridge_id designated_bridge;
u32 path_cost;
u32 designated_cost;
/*端口的一些定时器*/
struct timer_list forward_delay_timer;
struct timer_list hold_timer;
struct timer_list message_age_timer;
struct kobject kobj;
struct rcu_head rcu;
unsigned long flags;
#define BR_HAIRPIN_MODE 0x00000001
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
u32 multicast_startup_queries_sent;
unsigned char multicast_router;
struct timer_list multicast_router_timer;
struct timer_list multicast_query_timer;
struct hlist_head mglist;
struct hlist_node rlist;
#endif
#ifdef CONFIG_SYSFS
char sysfs_name[IFNAMSIZ];
#endif
};
struct br_cpu_netstats {
unsigned long rx_packets;
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
};
struct net_bridge_fdb_entry是一个mac地址和网络端口的对应表,最主要的两个元素是dst、addr,也就是这个mac地址从那个网络端口出去。
struct net_bridge_fdb_entry
{
struct hlist_node hlist; //链表指针
struct net_bridge_port *dst; //网络端口
struct rcu_head rcu;
unsigned long ageing_timer;
mac_addr addr; //mac地址
unsigned char is_local; //是否是本机mac地址
unsigned char is_static; //是否是静态mac地址
};
网桥在内核中实现是一个模块,所有模块的初始化是br_init函数,函数在/net/bridge/br.c文件中
br主要做以下几件事情:
(1)STP协议注册
(2)MAC学习表的初始化
(3)网桥在netfilter上钩子函数注册
(4)网桥的ioctl设置,提供给应用层的操作接口
static int __init br_init(void)
{
int err;
/*STP协议注册*/
err = stp_proto_register(&br_stp_proto);
if (err < 0) {
pr_err("bridge: can't register sap for STP\n");
return err;
}
/*端口-MAC表初始化*/
err = br_fdb_init();
if (err)
goto err_out;
err = register_pernet_subsys(&br_net_ops);
if (err)
goto err_out1;
/*netfilter钩子函数注册*/
err = br_netfilter_init();
if (err)
goto err_out2;
/*注册到netdevice通知链上*/
err = register_netdevice_notifier(&br_device_notifier);
if (err)
goto err_out3;
/*netlink初始化*/
err = br_netlink_init();
if (err)
goto err_out4;
/*设置网桥的ioctl,提供给用户层的接口*/
brioctl_set(br_ioctl_deviceless_stub);
/*网桥数据处理接口*/
br_handle_frame_hook = br_handle_frame;
#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
br_fdb_test_addr_hook = br_fdb_test_addr;
#endif
return 0;
err_out4:
unregister_netdevice_notifier(&br_device_notifier);
err_out3:
br_netfilter_fini();
err_out2:
unregister_pernet_subsys(&br_net_ops);
err_out1:
br_fdb_fini();
err_out:
stp_proto_unregister(&br_stp_proto);
return err;
}
网桥在netfilter框架中主要注册了7个钩子函数,接下来会详细介绍。
/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
* br_dev_queue_push_xmit is called afterwards */
static struct nf_hook_ops br_nf_ops[] __read_mostly = {
{
.hook = br_nf_pre_routing,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_local_in,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_LOCAL_IN,
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_forward_ip,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF - 1,
},
{
.hook = br_nf_forward_arp,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_post_routing,
.owner = THIS_MODULE,
.pf = PF_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
.priority = NF_BR_PRI_LAST,
},
{
.hook = ip_sabotage_in,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_FIRST,
},
{
.hook = ip_sabotage_in,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_FIRST,
},
};
我们通过brctl addbr br0命令新建一个网桥br0,内核提供的ioctl接口由函数br_ioctl_deviceless_stub实现,新建网桥调用br_add_bridge,删除网桥调用br_del_bridge。
int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
{
switch (cmd) {
case SIOCGIFBR:
case SIOCSIFBR:
return old_deviceless(net, uarg);
case SIOCBRADDBR:
case SIOCBRDELBR:
{
char buf[IFNAMSIZ];
if (!capable(CAP_NET_ADMIN))
return -EPERM;
/*从应用层地址空间拷贝数据*/
if (copy_from_user(buf, uarg, IFNAMSIZ))
return -EFAULT;
buf[IFNAMSIZ-1] = 0;
if (cmd == SIOCBRADDBR)
/*新建一个网桥设备*/
return br_add_bridge(net, buf);
/*删除一个网桥设备*/
return br_del_bridge(net, buf);
}
}
return -EOPNOTSUPP;
}
br_add_bridge主要做三件事情:
1、调用new_brideg_dev根据网桥名字新建一个网桥设备
2、网桥设备也是一个以太设备所以要调用register_netdevice注册这个网桥设备的struct net_device
3、初始化网桥设备在sysfs中的相关信息,便于管理查找
int br_add_bridge(struct net *net, const char *name)
{
struct net_device *dev;
int ret;
/*新建一个网桥设备,name是网桥的名字*/
dev = new_bridge_dev(net, name);
if (!dev)
return -ENOMEM;
rtnl_lock();
if (strchr(dev->name, '%')) {
ret = dev_alloc_name(dev, dev->name);
if (ret < 0)
goto out_free;
}
SET_NETDEV_DEVTYPE(dev, &br_type);
/*注册网桥虚拟设备*/
ret = register_netdevice(dev);
if (ret)
goto out_free;
/*在sysfs中建立相关信息,便于查找和管理*/
ret = br_sysfs_addbr(dev);
if (ret)
unregister_netdevice(dev);
out:
rtnl_unlock();
return ret;
out_free:
free_netdev(dev);
goto out;
}
网桥设备也是一个虚拟的以太设备,所以调用alloc_netdev新建一个以太设备,并执行网桥初始化函数br_dev_setup,net_device的私有数据指向网桥。
static struct net_device *new_bridge_dev(struct net *net, const char *name)
{
struct net_bridge *br;
struct net_device *dev;
/*创建一个虚拟dev,执行网桥初始化函数br_dev_setup*/
dev = alloc_netdev(sizeof(struct net_bridge), name,
br_dev_setup);
if (!dev)
return NULL;
dev_net_set(dev, net);
/*struct net_device私有数据指向网桥*/
br = netdev_priv(dev);
br->dev = dev;
br->stats = alloc_percpu(struct br_cpu_netstats);
if (!br->stats) {
free_netdev(dev);
return NULL;
}
spin_lock_init(&br->lock);
INIT_LIST_HEAD(&br->port_list);
spin_lock_init(&br->hash_lock);
br->bridge_id.prio[0] = 0x80;
br->bridge_id.prio[1] = 0x00;
memcpy(br->group_addr, br_group_address, ETH_ALEN);
br->feature_mask = dev->features;
/*默认不开启STP功能*/
br->stp_enabled = BR_NO_STP;
br->designated_root = br->bridge_id;
br->root_path_cost = 0;
br->root_port = 0;
br->bridge_max_age = br->max_age = 20 * HZ;
br->bridge_hello_time = br->hello_time = 2 * HZ;
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
br->topology_change = 0;
br->topology_change_detected = 0;
br->ageing_time = 300 * HZ;
/*路由表相关初始化*/
br_netfilter_rtable_init(br);
/*相关定时器初始化*/
br_stp_timer_init(br);
br_multicast_init(br);
return dev;
}
这个函数主要是初始化网桥
(1)初始化网桥的MAC
(2)对网桥进行以太网初始化
(3)设置以太设备的操作函数指针br_netdev_ops
(4)设置以太设备管理函数指向br_ethtool_ops。
void br_dev_setup(struct net_device *dev)
{
/*初始化MAC*/
random_ether_addr(dev->dev_addr);
/*网桥是以太设备,所以要进行以太初始化*/
ether_setup(dev);
/*以太设备的操作函数指针*/
dev->netdev_ops = &br_netdev_ops;
dev->destructor = br_dev_free;
/*以太设备的管理操作函数指针*/
SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
dev->tx_queue_len = 0;
/*私有数据网桥标志*/
dev->priv_flags = IFF_EBRIDGE;
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
NETIF_F_NETNS_LOCAL | NETIF_F_GSO;
}
br_netdev_ops:
static const struct net_device_ops br_netdev_ops = {
.ndo_open = br_dev_open, //打开设备
.ndo_stop = br_dev_stop, //关闭设备
.ndo_start_xmit = br_dev_xmit, //发送数据
.ndo_get_stats = br_get_stats,
.ndo_set_mac_address = br_set_mac_address, //设置mac地址
.ndo_set_multicast_list = br_dev_set_multicast_list,
.ndo_change_mtu = br_change_mtu, //设置mtu
.ndo_do_ioctl = br_dev_ioctl, //ioctl操作,添加、删除端口
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_netpoll_cleanup = br_netpoll_cleanup,
#endif
};
br_ethtool_ops:
static const struct ethtool_ops br_ethtool_ops = {
.get_drvinfo = br_getinfo,
.get_link = ethtool_op_get_link,
.get_tx_csum = ethtool_op_get_tx_csum,
.set_tx_csum = br_set_tx_csum,
.get_sg = ethtool_op_get_sg,
.set_sg = br_set_sg,
.get_tso = ethtool_op_get_tso,
.set_tso = br_set_tso,
.get_ufo = ethtool_op_get_ufo,
.set_ufo = ethtool_op_set_ufo,
.get_flags = ethtool_op_get_flags,
};
应用层通过brctl addif br0 eth0将eth0添加到网桥br0下面,内核的实现函数是br_dev_ioctl,依据cmd添加或者删除端口add_del_if
int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
struct net_bridge *br = netdev_priv(dev);
switch(cmd) {
case SIOCDEVPRIVATE:
return old_dev_ioctl(dev, rq, cmd);
case SIOCBRADDIF:
case SIOCBRDELIF:
/*根据cmd添加或者删除端口*/
return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);
}
br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd);
return -EOPNOTSUPP;
}
/* called with RTNL */
static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
{
struct net_device *dev;
int ret;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
/*查找网桥设备是否存在,不存在直接返回*/
dev = __dev_get_by_index(dev_net(br->dev), ifindex);
if (dev == NULL)
return -EINVAL;
if (isadd)
/*添加端口*/
ret = br_add_if(br, dev);
else
/*删除端口*/
ret = br_del_if(br, dev);
return ret;
}
这个函数是网桥添加端口的关键,主要做以下事情
(1)、首先对要添加的设备检查,环路设备、非以太网设备直接返回,检查设备是否已经添加到其他网桥下面,
(2)、调用new_nbp新建一个网桥端口结构体struct net_bridge_port
(3)、设备的模式为混合模式
(4)、将新增的端口添加到端口-MAC表中
(5)、将新增的端口p添加到sysfs文件系统中
(6)、将新增的端口p添加到端口链表port_list中
/* called with RTNL */
int br_add_if(struct net_bridge *br, struct net_device *dev)
{
struct net_bridge_port *p;
int err = 0;
/*环路端口或者非以太网端口不添加直接返回*/
/* Don't allow bridging non-ethernet like devices */
if ((dev->flags & IFF_LOOPBACK) ||
dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN)
return -EINVAL;
/*端口本事是桥设备直接返回*/
/* No bridging of bridges */
if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
return -ELOOP;
/*端口已经添加到了其他网桥下面也直接返回*/
/* Device is already being bridged */
if (dev->br_port != NULL)
return -EBUSY;
/*不允许添加的网桥标志*/
/* No bridging devices that dislike that (e.g. wireless) */
if (dev->priv_flags & IFF_DONT_BRIDGE)
return -EOPNOTSUPP;
/*新建一个端口*/
p = new_nbp(br, dev);
if (IS_ERR(p))
return PTR_ERR(p);
/*设置设备的为混杂模式*/
err = dev_set_promiscuity(dev, 1);
if (err)
goto put_back;
err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
SYSFS_BRIDGE_PORT_ATTR);
if (err)
goto err0;
/*将端口添加到端口-MAC映射表中*/
err = br_fdb_insert(br, p, dev->dev_addr);
if (err)
goto err1;
/*将新增的端口p添加到sysfs文件中*/
err = br_sysfs_addif(p);
if (err)
goto err2;
rcu_assign_pointer(dev->br_port, p);
dev_disable_lro(dev);
/*将端口p添加到端口链表port_list中*/
list_add_rcu(&p->list, &br->port_list);
spin_lock_bh(&br->lock);
br_stp_recalculate_bridge_id(br);
br_features_recompute(br);
if ((dev->flags & IFF_UP) && netif_carrier_ok(dev) &&
(br->dev->flags & IFF_UP))
br_stp_enable_port(p);
spin_unlock_bh(&br->lock);
br_ifinfo_notify(RTM_NEWLINK, p);
dev_set_mtu(br->dev, br_min_mtu(br));
kobject_uevent(&p->kobj, KOBJ_ADD);
br_netpoll_enable(br, dev);
return 0;
err2:
br_fdb_delete_by_port(br, p, 1);
err1:
kobject_put(&p->kobj);
p = NULL; /* kobject_put frees */
err0:
dev_set_promiscuity(dev, -1);
put_back:
dev_put(dev);
kfree(p);
return err;
}