转载:
https://blog.csdn.net/Sophisticated_/article/details/87923362
http://blog.chinaunix.net/uid-28541347-id-5750406.html
https://blog.csdn.net/linyt/article/details/5191512
https://blog.csdn.net/NW_NW_NW/article/details/76674232
https://www.cnblogs.com/super-king/p/bridge_implement.html
https://www.cnblogs.com/ck1020/p/5894235.html
https://blog.csdn.net/one_clouder/article/details/52877737
linux内核源代码变动怎么这么大,handle_bridge函数居然没有了,本来接着准备以3.9.1分析的,但发现和后面的又变了,还是以4.15.1现在电脑上用的版本分析吧。
linux kernel:4.15.1
先看两张图片
转载:https://blog.csdn.net/NW_NW_NW/article/details/76153027
linux内核版本不一样,函数发生了些许小改变,对图片进行细微更改。
转载:https://blog.csdn.net/u012247418/article/details/90137663
作用:
1. 对于需要转发的报文,调用NF_BR_PRE_ROUTING处钩子函数,结束后,进入br_handle_frame_finish()函数;
2. 对于STP报文,调用NF_BR_LOCAL_IN处钩子函数,结束后,进入br_handle_local_finish()函数,在br_handle_local_finish()函数中会调用br_pass_frame_up()函数。
//linux/net/bridge/br_input.c
/*
* Return NULL if skb is handled
* note: already called with rcu_read_lock
*/
rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
{
struct net_bridge_port *p;
struct sk_buff *skb = *pskb;
const unsigned char *dest = eth_hdr(skb)->h_dest;
br_should_route_hook_t *rhook;
/*如果是环回地址,直接返回RX_HANDLER_PASS*/
if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
return RX_HANDLER_PASS;
/*判断源MAC地址是否是有效的地址,不是直接丢弃,源MAC地址不能是多播地址和全0地址*/
if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
goto drop;
/*判断是否是共享数据包,若是则clone该数据包;若clone时分配内存出错,返回NULL*/
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb)
return RX_HANDLER_CONSUMED;
/*获取dev对应的网桥端口*/
p = br_port_get_rcu(skb->dev);
if (p->flags & BR_VLAN_TUNNEL) {
if (br_handle_ingress_vlan_tunnel(skb, p,
nbp_vlan_group_rcu(p)))
goto drop;
}
/*特殊MAC地址处理*/
//如果目的mac地址是本地链路地址link local reserved addr (01:80:c2:00:00:0X) STP报文
if (unlikely(is_link_local_ether_addr(dest))) {
u16 fwd_mask = p->br->group_fwd_mask_required;
/*
* See IEEE 802.1D Table 7-10 Reserved addresses
*
* Assignment Value
* Bridge Group Address 01-80-C2-00-00-00
* (MAC Control) 802.3 01-80-C2-00-00-01
* (Link Aggregation) 802.3 01-80-C2-00-00-02
* 802.1X PAE address 01-80-C2-00-00-03
*
* 802.1AB LLDP 01-80-C2-00-00-0E
*
* Others reserved for future standardization
*/
fwd_mask |= p->group_fwd_mask;
switch (dest[5]) {
case 0x00: /* Bridge Group Address */
/* If STP is turned off,
then must forward to keep loop detection */
if (p->br->stp_enabled == BR_NO_STP ||
fwd_mask & (1u << dest[5]))
goto forward;
*pskb = skb;
__br_handle_local_finish(skb);
return RX_HANDLER_PASS;
case 0x01: /* IEEE MAC (Pause) */
goto drop;
case 0x0E: /* 802.1AB LLDP */
fwd_mask |= p->br->group_fwd_mask;
if (fwd_mask & (1u << dest[5]))
goto forward;
*pskb = skb;
__br_handle_local_finish(skb);
return RX_HANDLER_PASS;
default:
/* Allow selective forwarding for most other protocols */
fwd_mask |= p->br->group_fwd_mask;
if (fwd_mask & (1u << dest[5]))
goto forward;
}
/* Deliver packet to local host only */
/*调用NF_BR_LOCAL_IN处钩子函数,结束后,进入br_handle_local_finish函数*/
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev),
NULL, skb, skb->dev, NULL, br_handle_local_finish);
return RX_HANDLER_CONSUMED;
}
forward:
switch (p->state) {
//网桥端口处于转发状态
case BR_STATE_FORWARDING:
rhook = rcu_dereference(br_should_route_hook);
if (rhook) {
if ((*rhook)(skb)) {
*pskb = skb;
return RX_HANDLER_PASS;
}
dest = eth_hdr(skb)->h_dest;
}
/* fall through */
/*网桥端口处于学习状态,处于转发状态也会执行下面的代码,因为上面的case没有break。*/
case BR_STATE_LEARNING:
/*数据包目的MAC为网桥的Mac,发往本地的数据包*/
if (ether_addr_equal(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;
/*调用NF_BR_PRE_ROUTING处钩子函数,结束后进入br_handle_frame_finish函数*/
NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
br_handle_frame_finish);
break;
default:
drop:
kfree_skb(skb);
}
return RX_HANDLER_CONSUMED;
}
相关函数
1. rx_handler_result_t枚举类型
enum rx_handler_result {
RX_HANDLER_CONSUMED,
RX_HANDLER_ANOTHER,
RX_HANDLER_EXACT,
RX_HANDLER_PASS,
};
typedef enum rx_handler_result rx_handler_result_t;
2. is_valid_ether_addr()
/**
* is_valid_ether_addr - Determine if the given Ethernet address is valid
* @addr: Pointer to a six-byte array containing the Ethernet address
*
* Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not
* a multicast address, and is not FF:FF:FF:FF:FF:FF.
*
* Return true if the address is valid.
*
* Please note: addr must be aligned to u16.
*/
static inline bool is_valid_ether_addr(const u8 *addr)
{
/* FF:FF:FF:FF:FF:FF is a multicast address so we don't need to
* explicitly check for it here. */
return !is_multicast_ether_addr(addr) && !is_zero_ether_addr(addr);
}
3.br_handle_local_finish()
br_handle_local_finish()函数中调用br_pass_fame_up()函数。
/* note: already called with rcu_read_lock */
static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
__br_handle_local_finish(skb);
BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
br_pass_frame_up(skb);
return 0;
}
作用:
网桥设备是否处于混杂模式,如果是,则会发一份到本地进行处理
如果是广播包,则会进行广播洪泛,并会发一份到本地处理
如果是组播包,则根据组播表进行组播转发,并发一份数数包到本地处理
如果是单播包,发往本地的单播包则送到本地处理,在fdb表中可以找到转发表项的单播包则进行转发,未知单播包在广播域内进行洪泛
//linux/net/bridge/br_input.c
/* note: already called with rcu_read_lock */
int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
enum br_pkt_type pkt_type = BR_PKT_UNICAST;
struct net_bridge_fdb_entry *dst = NULL;
struct net_bridge_mdb_entry *mdst;
bool local_rcv, mcast_hit = false;
const unsigned char *dest;
struct net_bridge *br;
u16 vid = 0;
/*如果网桥端口不存在或者网桥端口状态为BR_STATE_DISABLED,则丢弃*/
if (!p || p->state == BR_STATE_DISABLED)
goto drop;
/*判断是否允许进入桥内,如果没有开启VLAN则所有数据包都可以进入,
如果开启了VLAN,则根据VLAN相应的规则,从桥上进行数据包转发。*/
if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
goto out;
nbp_switchdev_frame_mark(p, skb);
/* insert into forwarding database after filtering to avoid spoofing */
br = p->br;
/*如果网桥端口标志有BR_LEARNING,则更新fdb表。
一般新建网桥端口p->flags=BR_LEARNING| BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD*/
if (p->flags & BR_LEARNING)
br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
//发往本地数据包标记,!!的作用是转换为bool值
local_rcv = !!(br->dev->flags & IFF_PROMISC);
dest = eth_hdr(skb)->h_dest;
/*目的地址为多播地址*/
if (is_multicast_ether_addr(dest)) {
/* by definition the broadcast is also a multicast address */
/*如果目的地址是广播地址,将数据包也发往本地一份*/
if (is_broadcast_ether_addr(dest)) {
pkt_type = BR_PKT_BROADCAST;
local_rcv = true;
} else {
pkt_type = BR_PKT_MULTICAST;
//igmp snooping留给网桥子系统的外部接口函数,
//当网桥接收了igmp数据包后就会调用该函数进行后续处理
if (br_multicast_rcv(br, p, skb, vid))
goto drop;
}
}
//如果网桥端口状态此时还是BR_STATE_LEARNING,则丢弃。
if (p->state == BR_STATE_LEARNING)
goto drop;
//将网桥所属的net_device放入skb的私有数据中(struct br_input_skb_cb)
BR_INPUT_SKB_CB(skb)->brdev = br->dev;
if (IS_ENABLED(CONFIG_INET) &&
(skb->protocol == htons(ETH_P_ARP) ||
skb->protocol == htons(ETH_P_RARP))) {
br_do_proxy_suppress_arp(skb, br, vid, p);
} else if (IS_ENABLED(CONFIG_IPV6) &&
skb->protocol == htons(ETH_P_IPV6) &&
br->neigh_suppress_enabled &&
pskb_may_pull(skb, sizeof(struct ipv6hdr) +
sizeof(struct nd_msg)) &&
ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
struct nd_msg *msg, _msg;
msg = br_is_nd_neigh_msg(skb, &_msg);
if (msg)
br_do_suppress_nd(skb, br, vid, p, msg);
}
switch (pkt_type) {
//组播包
case BR_PKT_MULTICAST:
//获取组播转发项,设置local_rcv为true,组播包也要发往本地一份。
mdst = br_mdb_get(br, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
br_multicast_querier_exists(br, eth_hdr(skb))) {
if ((mdst && mdst->host_joined) ||
br_multicast_is_router(br)) {
local_rcv = true;
br->dev->stats.multicast++;
}
mcast_hit = true;
} else {
local_rcv = true;
br->dev->stats.multicast++;
}
break;
//单播包
case BR_PKT_UNICAST:
//根据目的MAC地址查找fdb表,看是否有对应的表项
dst = br_fdb_find_rcu(br, dest, vid);
default:
break;
}
//如果找到目的MAC对应的表项
if (dst) {
unsigned long now = jiffies;
//送入上层处理
if (dst->is_local)
return br_pass_frame_up(skb);
if (now != dst->used)
dst->used = now;
//根据fdb转发表项进行转发,若这里local_rcv 为1,(即端口处于混杂模式IFF_PROMISC),则会克隆一份再转发
//传入的第一个参数dst->dst 即为要转发的目的端口
br_forward(dst->dst, skb, local_rcv, false);
} else {
//进行广播或者组播洪泛
if (!mcast_hit)
br_flood(br, skb, pkt_type, local_rcv, false);
else
br_multicast_flood(mdst, skb, local_rcv, false);
}
//local_rcv标记为1,送入上层处理。
if (local_rcv)
return br_pass_frame_up(skb);
out:
return 0;
drop:
kfree_skb(skb);
goto out;
}
数据包的目的MAC是本地的单播报文,广播,组播和网桥处于混杂模式时,报文都会通过br_pass_frame_up函数交由上层处理。
作用:
调用NF_BR_LOCAL_IN处钩子函数,最后调用br_netif_receive_skb函数,绕一圈后,交友上层处理。
//linux/net/bridge/br_input.c
static int br_pass_frame_up(struct sk_buff *skb)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
struct net_bridge *br = netdev_priv(brdev);
struct net_bridge_vlan_group *vg;
struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
//统计网桥设备上的收包流量数据
u64_stats_update_begin(&brstats->syncp);
brstats->rx_packets++;
brstats->rx_bytes += skb->len;
u64_stats_update_end(&brstats->syncp);
//获取网桥设备上的VLAN组
vg = br_vlan_group_rcu(br);
/* Bridge is just like any other port. Make sure the
* packet is allowed except in promisc modue when someone
* may be running packet capture.
*/
if (!(brdev->flags & IFF_PROMISC) &&
!br_allowed_egress(vg, skb)) {
kfree_skb(skb);
return NET_RX_DROP;
}
//记录数据包的收包网络设备
indev = skb->dev;
//将数据包的收包设备改为网桥设备
//当再次进入__netif_receive_skb_core时就不会再次进入桥处理了,因为网桥上没有注册rx_handler 函数
skb->dev = brdev;
//配置数据包vlan相关信息
skb = br_handle_vlan(br, NULL, vg, skb);
if (!skb)
return NET_RX_DROP;
/* update the multicast stats if the packet is IGMP/MLD */
//如果数据包是组播,更新组播数据包的统计信息
br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
BR_MCAST_DIR_TX);
//调用NF_BR_LOCAL_IN处钩子函数,最后调用br_netif_receive_skb函数。
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
dev_net(indev), NULL, skb, indev, NULL,
br_netif_receive_skb);
}
再次进入netif_receive_skb,由于skb-dev被设置成了bridge,而bridge设备的rx_handler函数是没有被设置的,所以就不会再次进入bridge逻辑,而直接进入了主机上层协议栈。
相关函数
1. br_netif_receive_skb()
可以看到在br_netif_receive_skb()函数中调用了netif_receive_skb()函数。
static int
br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
{
br_drop_fake_rtable(skb);
return netif_receive_skb(skb);
}
不是发往本地的数据包,但在fdb表中能找到对应的表项,则进行转发br_forward(),若在fdb表中找不到对应表项就进行洪泛br_blood().
作用:
主要是调用__br_forward()转发报文
//linux/net/bridge/br_forward.c
/**
* br_forward - forward a packet to a specific port
* @to: destination port
* @skb: packet being forwarded
* @local_rcv: packet will be received locally after forwarding
* @local_orig: packet is locally originated
*
* Should be called with rcu_read_lock.
*/
void br_forward(const struct net_bridge_port *to,
struct sk_buff *skb, bool local_rcv, bool local_orig)
{
//should_deliver测试是否应该将该包转发出去
if (to && should_deliver(to, skb)) {
//如果local_rcv为1,表明端口为混杂模式,先clone一份数据包再进行转发
if (local_rcv)
deliver_clone(to, skb, local_orig);
else
__br_forward(to, skb, local_orig);
return;
}
if (!local_rcv)
kfree_skb(skb);
}
感觉这个函数很重要
作用
__br_forward()函数根据数据包的来源(local_orig)分别进入不同的钩子点,如果数据包是从本地发出的,则进入NF_BR_LOCAL_OUT,如果不是本地发出的,则进入NF_BR_FORWARD钩子,最后都进入br_forward_finish()函数。
//linux/net/bridge/br_forward.c
static void __br_forward(const struct net_bridge_port *to,
struct sk_buff *skb, bool local_orig)
{
struct net_bridge_vlan_group *vg;
struct net_device *indev;
struct net *net;
int br_hook;
//获取vlan组,这个组中有许多的vlanid,br_handle_vlan函数就是要在这个组中查找自己的vid
vg = nbp_vlan_group_rcu(to);
//添加vlan的相关配置
skb = br_handle_vlan(to->br, to, vg, skb);
if (!skb)
return;
//记录数据包的原始收包网络设备
indev = skb->dev;
//将skb的dev修改为出口网络设备
skb->dev = to->dev;
/*如果local_orig标志位(判断是否从本地发出的数据包)为false,就进入NF_BR_FORWARD钩子
若为true,就进入NF_BR_LOCAL_OUT钩子点*/
if (!local_orig) {
if (skb_warn_if_lro(skb)) {
kfree_skb(skb);
return;
}
//不是本地发出的数据包,进入NF_BR_FORWARD
br_hook = NF_BR_FORWARD;
skb_forward_csum(skb);
net = dev_net(indev);
} else {
if (unlikely(netpoll_tx_running(to->br->dev))) {
if (!is_skb_forwardable(skb->dev, skb)) {
kfree_skb(skb);
} else {
skb_push(skb, ETH_HLEN);
br_netpoll_send_skb(to, skb);
}
return;
}
//若是本地发出的数据包,进入NF_BR_LOCAL_OUT处理
br_hook = NF_BR_LOCAL_OUT;
net = dev_net(skb->dev);
indev = NULL;
}
//进入钩子点,最后执行br_forward_finish函数。
NF_HOOK(NFPROTO_BRIDGE, br_hook,
net, NULL, skb, indev, skb->dev,
br_forward_finish);
}
br_forward_finish()函数比较简单,调用NF_BR_POST_ROUTING处的钩子函数,最后进入br_dev_queue_push_xmit函数。
//linux/net/bridge/br_forward.c
int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
net, sk, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
}
在br_dev_queue_push_xmit()中,会先skb_push(skb,ETH,HLEN);将data指向二层头部,然后调用dev_queue_xmit()发送报文。
//linux/net/bridge/br_forward.c
int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if (!is_skb_forwardable(skb->dev, skb))
goto drop;
//注意这句操作
skb_push(skb, ETH_HLEN);
br_drop_fake_rtable(skb);
if (skb->ip_summed == CHECKSUM_PARTIAL &&
(skb->protocol == htons(ETH_P_8021Q) ||
skb->protocol == htons(ETH_P_8021AD))) {
int depth;
if (!__vlan_get_protocol(skb, skb->protocol, &depth))
goto drop;
skb_set_network_header(skb, depth);
}
dev_queue_xmit(skb);
return 0;
drop:
kfree_skb(skb);
return 0;
}
br_flood()也是调用__br_forward()函数转发报文。
//linux/net/bridge/br_forward.c
/* called under rcu_read_lock */
void br_flood(struct net_bridge *br, struct sk_buff *skb,
enum br_pkt_type pkt_type, bool local_rcv, bool local_orig)
{
u8 igmp_type = br_multicast_igmp_type(skb);
struct net_bridge_port *prev = NULL;
struct net_bridge_port *p;
/*遍历网桥设备的port_list,取出所有的网桥端口*/
list_for_each_entry_rcu(p, &br->port_list, list)
{
/* Do not flood unicast traffic to ports that turn it off, nor
* other traffic if flood off, except for traffic we originate
*/
switch (pkt_type)
{
case BR_PKT_UNICAST:
if (!(p->flags & BR_FLOOD))
continue;
break;
case BR_PKT_MULTICAST:
if (!(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev)
continue;
break;
case BR_PKT_BROADCAST:
if (!(p->flags & BR_BCAST_FLOOD) && skb->dev != br->dev)
continue;
break;
}
/* Do not flood to ports that enable proxy ARP */
/*代理arp*/
if (p->flags & BR_PROXYARP)
continue;
if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) &&
BR_INPUT_SKB_CB(skb)->proxyarp_replied)
continue;
prev = maybe_deliver(prev, p, skb, local_orig);
if (IS_ERR(prev))
goto out;
if (prev == p)
br_multicast_count(p->br, p, skb, igmp_type,
BR_MCAST_DIR_TX);
}
if (!prev)
goto out;
if (local_rcv)
deliver_clone(prev, skb, local_orig);
else
__br_forward(prev, skb, local_orig);
return;
out:
if (!local_rcv)
kfree_skb(skb);
}
以后再分析,肚子饿了。
//linux/net/bridge/br_forward.c
/* called with rcu_read_lock */
void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb,
bool local_rcv, bool local_orig)
{
struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
u8 igmp_type = br_multicast_igmp_type(skb);
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *prev = NULL;
struct net_bridge_port_group *p;
struct hlist_node *rp;
rp = rcu_dereference(hlist_first_rcu(&br->router_list));
p = mdst ? rcu_dereference(mdst->ports) : NULL;
while (p || rp) {
struct net_bridge_port *port, *lport, *rport;
lport = p ? p->port : NULL;
rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
NULL;
if ((unsigned long)lport > (unsigned long)rport) {
port = lport;
if (port->flags & BR_MULTICAST_TO_UNICAST) {
maybe_deliver_addr(lport, skb, p->eth_addr,
local_orig);
goto delivered;
}
} else {
port = rport;
}
prev = maybe_deliver(prev, port, skb, local_orig);
delivered:
if (IS_ERR(prev))
goto out;
if (prev == port)
br_multicast_count(port->br, port, skb, igmp_type,
BR_MCAST_DIR_TX);
if ((unsigned long)lport >= (unsigned long)port)
p = rcu_dereference(p->next);
if ((unsigned long)rport >= (unsigned long)port)
rp = rcu_dereference(hlist_next_rcu(rp));
}
if (!prev)
goto out;
if (local_rcv)
deliver_clone(prev, skb, local_orig);
else
__br_forward(prev, skb, local_orig);
return;
out:
if (!local_rcv)
kfree_skb(skb);
}