syn-proxy logic1

依赖NF_INET_PRE_ROUTING链上的ip_vs_pre_routing()hook函数来向client发送SYN ACK报文。ip_vs_pre_routing()源码如下:


ip_vs_pre_routing(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn) (struct sk_buff *))
{ 
    struct ip_vs_iphdr iph;

    /* address family */
    int af; 
    struct ip_vs_service *svc; 

   af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; 

   ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);

   /* drop all ip fragment except ospf */
  if ((af == AF_INET)

     /* 通过判断ip header中frag_off字段及MF(More Fragment字段确定该包是否为ip分片包) */
     && (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) 

     /* 不drop ospf协议包,并交给协议栈继续处理 */
     && (iph.protocol != IPPROTO_OSPF)) { 
  if(sysctl_ip_vs_frag_drop_entry == 1) { 
      IP_VS_INC_ESTATS(ip_vs_esmib, DEFENCE_IP_FRAG_DROP);
     return NF_DROP;
  } else {
     if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_IN))
       return NF_STOLEN;

     IP_VS_INC_ESTATS(ip_vs_esmib, DEFENCE_IP_FRAG_GATHER);
     ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
  } 
} 

  /* drop udp packet which send to tcp-vip */
  if ((sysctl_ip_vs_udp_drop_entry == 1) && (iph.protocol == IPPROTO_UDP)) {

    /* 在svc链表中根据协议族、传输层协议及vip三者的hash key查找对应hash bucket中满足条件的svc结构体 (当bucket数量足够多时,每个hash key对应的bucket中只有一个svc结构体,查找复杂度为O(1))*/
    if ((svc =
         ip_vs_lookup_vip(af, IPPROTO_TCP, &iph.daddr)) != NULL) {
           IP_VS_INC_ESTATS(ip_vs_esmib, DEFENCE_UDP_DROP);
           return NF_DROP;
  } 
}

  /* synproxy: defence synflood */
  if (iph.protocol == IPPROTO_TCP) {
    int v = NF_ACCEPT;

    /* 构建回给client的syn-ack包,利用变量v作为该hook函数的返回值 */

    if (0 == ip_vs_synproxy_syn_rcv(af, skb, &iph, &v)) {
      return v;
  } 
} 

  return NF_ACCEPT;
}

ip_vs_synproxy_syn_rcv()实现如下:

int 
ip_vs_synproxy_syn_rcv(int af, struct sk_buff *skb,
struct ip_vs_iphdr *iph, int *verdict)
{ 
  struct ip_vs_service *svc = NULL;
  struct tcphdr _tcph, *th; 
  struct ip_vs_synproxy_opt tcp_opt;

  /* 获取client syn包的tcp header */
  th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
  if (unlikely(th == NULL)) {
    goto syn_rcv_out;
} 
  /* 判断是否为syn包  */
  if (th->syn && !th->ack && !th->rst && !th->fin &&

      /* 根据传输层协议和目标地址及目标端口获取对应的svc结构体 */
      (svc =
       ip_vs_service_get(af, skb->mark, iph->protocol, &iph->daddr,
                              th→dest))

       /* 若syn-proxy未打开,则该包将由协议栈继续处理 */
       && (svc->flags & IP_VS_CONN_F_SYNPROXY)) {
         // release service here, because don't use it any all.
         ip_vs_service_put(svc);

         if (ip_vs_todrop()) {
           /* 
            * It seems that we are very loaded.
            * We have to drop this packet :(
            */
             goto syn_rcv_out;
         } 
  } else {
        /* 
         * release service.
         */ 
         if (svc != NULL) {
           ip_vs_service_put(svc);
         } 
         return 1;
  } 

  /* update statistics */
  IP_VS_INC_ESTATS(ip_vs_esmib, SYNPROXY_SYN_CNT);

  /* Try to reuse skb if possible */

  /* 确保该skb当前只被一个user使用,且不是clone得到的(clone的skb是共享数据,在正常情况下不能写入) */

  if (unlikely(skb_shared(skb) || skb_cloned(skb))) {

      /* 复制一个私有的且能够修改其header和data的skb,若只需要修改header则需要调用pskb_copy() */
      struct sk_buff *new_skb = skb_copy(skb, GFP_ATOMIC);
      if (unlikely(new_skb == NULL)) {
        goto syn_rcv_out;
      } 
      /* Drop old skb */
     kfree_skb(skb);
     skb = new_skb;
  } 

  /* reuse skb here: deal with tcp options, exchage ip, port. */

  syn_proxy_reuse_skb(af, skb, &tcp_opt);

  /* syn_proxy_reuse_skb()主要完成以下任务:

   * 1. 设置syn-ack包tcp option:解析client syn包中的tcp option,并根据这些option及sysctl相关参数,确定syn-ack包的tcp option

   * 2. 生成syn-ack包seq number:根据client syn包中的源地址、目标地址、源端口、目标端口、seq number、系统启动时长(jiffies / (HZ * 60))以及tcp option拼成的data使用sha1算法得到cookie_hash(__u32)即init_seq_number

   * !!!:在启用synproxy的情况下,服务器收到syn包时,不会分配专门的数据区,而是根据这个syn包计算出一个cookie值,这个cookie作为将要返回的SYN ACK包的初始序列号。当客户端返回一个ACK包时,根据包头信息计算cookie,与返回的确认序列

   * !!!   号(初始序列号 + 1)进行对比,如果相同,则是一个正常连接,然后,分配资源,建立连接。

   * 3. 设置syn-ack包syn ack flag(0x12)

   * 4. 交换tcp header中的源目的端口

   * 5. 确定ip header中的源目的ip、ttl、tos(缺省为0)

   * 6. 计算ip header和tcp checksum

   */

  if (unlikely(skb->dev == NULL)) {
    IP_VS_ERR_RL("%s: skb->dev is null !!!\n", __func__);
    goto syn_rcv_out;
  } 

  /* Send the packet out */
  if (likely(skb->dev->type == ARPHRD_ETHER)) {
      unsigned char t_hwaddr[ETH_ALEN];
  /* 设置2层信息 */
  /* Move the data pointer to point to the link layer header */
  struct ethhdr *eth = (struct ethhdr *)skb_mac_header(skb);
  skb->data = (unsigned char *)skb_mac_header(skb);
  skb->len += ETH_HLEN; //sizeof(skb->mac.ethernet);

  memcpy(t_hwaddr, (eth->h_dest), ETH_ALEN);
  memcpy((eth->h_dest), (eth->h_source), ETH_ALEN);
  memcpy((eth->h_source), t_hwaddr, ETH_ALEN);
  skb->pkt_type = PACKET_OUTGOING; 
  } else if (skb->dev->type == ARPHRD_LOOPBACK) {
       /* set link layer */
      if (likely(skb_mac_header_was_set(skb))) {
          skb->data = skb_mac_header(skb);
          skb->len += sizeof(struct ethhdr);
      } else {
           skb_push(skb, sizeof(struct ethhdr));
           skb_reset_mac_header(skb);
     } 
  }

  /* 转发syn-ack包 */

  dev_queue_xmit(skb);

  /* 告诉内核该skb不再经过后续的内核协议栈处理,但保留为该skb分配的资源 */
  *verdict = NF_STOLEN;
  return 0;

syn_rcv_out:
    /* Drop the packet when all things are right also,
     * then we needn't to kfree_skb() */
     *verdict = NF_DROP;
     return 0;
}