Linux内核bridge中的数据包处理流程

本文档的Copyleftyfydz所有,使用GPL发布,可以自由拷贝,转载,转载时请保持文档的完整性,严禁用于任何商业用途。
msn: [email protected]
来源:http://yfydz.cublog.cn


1.
前言


本文简要介绍数据包在进入桥网卡后在Linux网络协议栈的处理流程,并描述netfilterhook点的挂接处理情况,具体各部分的详细处理待后续文章中说明。


以下内核代码版本为2.6.19.2.


2.
函数处理流程

bridge入口点handle_bridge()

/* net/core/dev.c */
 
int netif_receive_skb(struct sk_buff *skb)
{
......
 if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
  goto out;
......
}


bridge
基本挂接点处理函数:br_handle_frame_hook()
 
static __inline__ int handle_bridge(struct sk_buff **pskb,
        struct packet_type **pt_prev, int *ret,
        struct net_device *orig_dev)
{
 struct net_bridge_port *port;

 if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
     (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
  return 0;

 if (*pt_prev) {
  *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
  *pt_prev = NULL;
 }
 
 return br_handle_frame_hook(port, pskb);
}


bridge_handle_frame_hook()
的实际实现:

/* net/bridge/br.c */

static int __init br_init(void)
{
......
 br_handle_frame_hook = br_handle_frame;
......
}

 

br_handle_frame. PF_BEIDGEprerouting

/* net/bridge/br_input.c */

int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
{
 struct sk_buff *skb = *pskb;
 const unsigned char *dest = eth_hdr(skb)->h_dest;

 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
  goto err;

 if (unlikely(is_link_local(dest))) {
//
自身包进入PF_BEIDGEINPUT, 一般处理的包数不多
  skb->pkt_type = PACKET_HOST;
//
正常是返回1, 然后就返回1, 表示桥模块全权处理该包了
  return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
          NULL, br_handle_local_finish) != 0;
 }

 if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) {
// br_should_route_hook
函数一般没定义
  if (br_should_route_hook) {
   if (br_should_route_hook(pskb))
    return 0;
   skb = *pskb;
   dest = eth_hdr(skb)->h_dest;
  }

  if (!compare_ether_addr(p->br->dev->dev_addr, dest))
   skb->pkt_type = PACKET_HOST;

// PF_BRIDGEprerouting处理结束后进入br_handle_frame_finish
  NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
   br_handle_frame_finish);
//
处理后始终返回1, 表示不再进行其他协议族处理,该数据包已经完全由bridge处理完毕
  return 1;
 }

err:
 kfree_skb(skb);
//
处理后始终返回1, 表示不再进行其他协议族处理,该数据包已经完全由bridge处理完毕
 return 1;
}

通过br_handle_frame_finish进入bridge的转发:

/* note: already called with rcu_read_lock (preempt_disabled) */
int br_handle_frame_finish(struct sk_buff *skb)
{
 const unsigned char *dest = eth_hdr(skb)->h_dest;
 struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
 struct net_bridge *br;
 struct net_bridge_fdb_entry *dst;
 int passedup = 0;

 if (!p || p->state == BR_STATE_DISABLED)
  goto drop;

 /* insert into forwarding database after filtering to avoid spoofing */
 br = p->br;
 br_fdb_update(br, p, eth_hdr(skb)->h_source);

 if (p->state == BR_STATE_LEARNING)
  goto drop;

 if (br->dev->flags & IFF_PROMISC) {
  struct sk_buff *skb2;

  skb2 = skb_clone(skb, GFP_ATOMIC);
  if (skb2 != NULL) {
   passedup = 1;
   br_pass_frame_up(br, skb2);
  }
 }

 if (is_multicast_ether_addr(dest)) {
//
多播转发,也是调用广播处理
  br->statistics.multicast++;
  br_flood_forward(br, skb, !passedup);
  if (!passedup)
   br_pass_frame_up(br, skb);
  goto out;
 }
//
根据目的MAC找目的出口
 dst = __br_fdb_get(br, dest);

 if (dst != NULL && dst->is_local) {
  if (!passedup)
   br_pass_frame_up(br, skb);
  else
   kfree_skb(skb);
  goto out;
 }

 if (dst != NULL) {
//
单播转发
  br_forward(dst->dst, skb);
  goto out;
 }
//
广播转发
 br_flood_forward(br, skb, 0);

out:
 return 0;
drop:
 kfree_skb(skb);
 goto out;
}

广播/多播转发: br_flood_forward/br_flood

/* called under bridge lock */
void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone)
{
 br_flood(br, skb, clone, __br_forward);
}

/* called under bridge lock */
static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone,
 void (*__packet_hook)(const struct net_bridge_port *p,
         struct sk_buff *skb))
{
 struct net_bridge_port *p;
 struct net_bridge_port *prev;

 if (clone) {
  struct sk_buff *skb2;

  if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
   br->statistics.tx_dropped++;
   return;
  }

  skb = skb2;
 }

 prev = NULL;

 list_for_each_entry_rcu(p, &br->port_list, list) {
  if (should_deliver(p, skb)) {
   if (prev != NULL) {
    struct sk_buff *skb2;

    if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
     br->statistics.tx_dropped++;
     kfree_skb(skb);
     return;
    }
//
这里实际是__br_forward
    __packet_hook(prev, skb2);
   }

   prev = p;
  }
 }

 if (prev != NULL) {
//
这里实际是__br_forward
  __packet_hook(prev, skb);
  return;
 }

 kfree_skb(skb);
}

 

单播转发: br_forward

/* net/bridge/br_forward.c */
/* called with rcu_read_lock */
void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
{
 if (should_deliver(to, skb)) {
//
也是调用__br_forward
  __br_forward(to, skb);
  return;
 }

 kfree_skb(skb);
}

FORWARD:
static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
{
 struct net_device *indev;

 indev = skb->dev;
 skb->dev = to->dev;
 skb->ip_summed = CHECKSUM_NONE;
//
进入PF_BRIDGEforward hook, 结束后进入br_forward_finish()
 NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
   br_forward_finish);
}


POSTROUTING
:
//
FORWARD点处理后直接进入POSTROUTING点处理
int br_forward_finish(struct sk_buff *skb)
{
//
进入PF_BRIDGEpostrouting hook, 结束后进入br_dev_queue_push_xmit()
 return NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
         br_dev_queue_push_xmit);

}

数据包发出:

int br_dev_queue_push_xmit(struct sk_buff *skb)
{
 /* drop mtu oversized packets except gso */
 if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
  kfree_skb(skb);
 else {
  /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
  if (nf_bridge_maybe_copy_header(skb))
   kfree_skb(skb);
  else {
   skb_push(skb, ETH_HLEN);
//
此处调用dev设备的hard_start_xmit()函数
   dev_queue_xmit(skb);
  }
 }

 return 0;
}

桥网卡设备的hard_start_xmit()函数定义为:
/* net/bridge/br_device.c */

void br_dev_setup(struct net_device *dev)
{
......
 dev->hard_start_xmit = br_dev_xmit;
......
}

/* net device transmit always called with no BH (preempt_disabled) */
int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
 struct net_bridge *br = netdev_priv(dev);
 const unsigned char *dest = skb->data;
 struct net_bridge_fdb_entry *dst;

 br->statistics.tx_packets++;
 br->statistics.tx_bytes += skb->len;

 skb->mac.raw = skb->data;
 skb_pull(skb, ETH_HLEN);

 if (dest[0] & 1)
//
多播发送
  br_flood_deliver(br, skb, 0);
 else if ((dst = __br_fdb_get(br, dest)) != NULL)
//
单播发送
  br_deliver(dst->dst, skb);
 else
//
广播发送
  br_flood_deliver(br, skb, 0);
//
这些发送函数最终都会调用__br_deliver()函数
 return 0;
}


/* net/bridge/br_forward.c */
static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
{
 skb->dev = to->dev;
//
此处是PF_BRIDGEOUTPUT
 NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
   br_forward_finish);
}


总结: PF_BRIDGE中的各个hook点和PF_INET不同, 可用下面的图表示:


  PREROUTING --+--FORWARD-----POSTROUTING------+----OUTPUT
               |                               |
               |                               |
              INPUT

 

3. BF_BRIDGEhook


net/bridge/br_netfilter.c中定义了以下hook,注意这些hook点主要是PF_BRIDGE协议族的。

/* net/bridge/br_netfilter.c */

/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
 * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
 * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
 * ip_refrag() can return NF_STOLEN. */
static struct nf_hook_ops br_nf_ops[] = {
// PF_BRIDGE
的挂接点
// PREROUTING

 { .hook = br_nf_pre_routing,
   .owner = THIS_MODULE,
   .pf = PF_BRIDGE,
   .hooknum = NF_BR_PRE_ROUTING,
   .priority = NF_BR_PRI_BRNF, },
// INPUT

 { .hook = br_nf_local_in,
   .owner = THIS_MODULE,
   .pf = PF_BRIDGE,
   .hooknum = NF_BR_LOCAL_IN,
   .priority = NF_BR_PRI_BRNF, },
// FORWARD

 { .hook = br_nf_forward_ip,
   .owner = THIS_MODULE,
   .pf = PF_BRIDGE,
   .hooknum = NF_BR_FORWARD,
   .priority = NF_BR_PRI_BRNF - 1, },
// FORWARD

 { .hook = br_nf_forward_arp,
   .owner = THIS_MODULE,
   .pf = PF_BRIDGE,
   .hooknum = NF_BR_FORWARD,
   .priority = NF_BR_PRI_BRNF, },
// OUTPUT

 { .hook = br_nf_local_out,
   .owner = THIS_MODULE,
   .pf = PF_BRIDGE,
   .hooknum = NF_BR_LOCAL_OUT,
   .priority = NF_BR_PRI_FIRST, },
// POSTROUTING

 { .hook = br_nf_post_routing,
   .owner = THIS_MODULE,
   .pf = PF_BRIDGE,
   .hooknum = NF_BR_POST_ROUTING,
   .priority = NF_BR_PRI_LAST, },

// 后面是PF_INET/PF_INET6的挂接点, 其实也没进行什么数据包操作,
//
就是自身的输入输出包不通过桥处理,要短路掉
 { .hook = ip_sabotage_in,
   .owner = THIS_MODULE,
   .pf = PF_INET,
   .hooknum = NF_IP_PRE_ROUTING,
   .priority = NF_IP_PRI_FIRST, },
 { .hook = ip_sabotage_in,
   .owner = THIS_MODULE,
   .pf = PF_INET6,
   .hooknum = NF_IP6_PRE_ROUTING,
   .priority = NF_IP6_PRI_FIRST, },
 { .hook = ip_sabotage_out,
   .owner = THIS_MODULE,
   .pf = PF_INET,
   .hooknum = NF_IP_FORWARD,
   .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, },
 { .hook = ip_sabotage_out,
   .owner = THIS_MODULE,
   .pf = PF_INET6,
   .hooknum = NF_IP6_FORWARD,
   .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD, },
 { .hook = ip_sabotage_out,
   .owner = THIS_MODULE,
   .pf = PF_INET,
   .hooknum = NF_IP_LOCAL_OUT,
   .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
 { .hook = ip_sabotage_out,
   .owner = THIS_MODULE,
   .pf = PF_INET6,
   .hooknum = NF_IP6_LOCAL_OUT,
   .priority = NF_IP6_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, },
 { .hook = ip_sabotage_out,
   .owner = THIS_MODULE,
   .pf = PF_INET,
   .hooknum = NF_IP_POST_ROUTING,
   .priority = NF_IP_PRI_FIRST, },
 { .hook = ip_sabotage_out,
   .owner = THIS_MODULE,
   .pf = PF_INET6,
   .hooknum = NF_IP6_POST_ROUTING,
   .priority = NF_IP6_PRI_FIRST, },
};


// PF_BRIDGE
PRROUTING点处理函数
static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
          const struct net_device *in,
          const struct net_device *out,
          int (*okfn)(struct sk_buff *))
{
......

// 此处继续调用PF_INET族的PREROUTING点的hook处理
 NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
  br_nf_pre_routing_finish);

 return NF_STOLEN;

inhdr_error:
//      IP_INC_STATS_BH(IpInHdrErrors);
out:
 return NF_DROP;
}


// PF_BRIDGE
FORWARD点处理
static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
         const struct net_device *in,
         const struct net_device *out,
         int (*okfn)(struct sk_buff *))
{
......
//
此处继续调用PF_INET/PF_INET6族的FORWARD点的hook处理
 NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), parent,
  br_nf_forward_finish);

 return NF_STOLEN;
}

// PF_BRIDGEOUTPUT点处理
static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
        const struct net_device *in,
        const struct net_device *out,
        int (*okfn)(struct sk_buff *))
{
......
 /* IP forwarded traffic has a physindev, locally
  * generated traffic hasn't. */
 if (realindev != NULL) {
  if (!(nf_bridge->mask & BRNF_DONT_TAKE_PARENT)) {
   struct net_device *parent = bridge_parent(realindev);
   if (parent)
    realindev = parent;
  }
//
此处继续调用PF_INET/PF_INET6族的FORWARD点的hook处理, 不过优先权值要在//

NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1以上
  NF_HOOK_THRESH(pf, NF_IP_FORWARD, skb, realindev,
          realoutdev, br_nf_local_out_finish,
          NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1);
 } else {
//
此处继续调用PF_INET/PF_INET6族的FORWARD点的hook处理, 不过优先权值要在
// NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1
以上
  NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev,
          realoutdev, br_nf_local_out_finish,
          NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1);
 }

out:
 return NF_STOLEN;
}


// PF_BRIDGE
POSTROUTING
static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
           const struct net_device *in,
           const struct net_device *out,
           int (*okfn)(struct sk_buff *))
{
......
//
此处继续调用PF_INET/PF_INET6族的POSTROUTING点的hook处理
 NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev,
  br_nf_dev_queue_xmit);

 return NF_STOLEN;

#ifdef CONFIG_NETFILTER_DEBUG
print_error:
 if (skb->dev != NULL) {
  printk("[%s]", skb->dev->name);
  if (realoutdev)
   printk("[%s]", realoutdev->name);
 }
 printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw,
        skb->data);
 dump_stack();
 return NF_ACCEPT;
#endif
}


由此可见, PF_INET的各个hook点也被PF_BRIDGE的各个hook点调用因此可以在桥网卡中进行过滤或NAT等操作。


4.
结论


BRIDGE
的数据处理流程是是一个独立的处理过程 , 如果处理正常的话就不再返回到其他协议处理。
在桥的处理层次也和 IP 协议一样,可以挂接多个 PF_BRIDGE 的挂接点,这些挂接点中又调用了 PF_INET 族的挂接点,从而实现了桥下的过滤、 NAT 等功能。

来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/22214587/viewspace-709501/,如需转载,请注明出处,否则将追究法律责任。

转载于:http://blog.itpub.net/22214587/viewspace-709501/

你可能感兴趣的:(数据库,网络)