鉴于以上,逐步击破,解决方案就有了。
3.建立一个合理的cache替换自适应原则,保证在位者谋其职,不思进取者自退位的原则
因此,我的最终设计是以下的样子:
效果:数据流到达速率越快就越容易以极低的代价命中cache,数据流达到速率越慢越不容易命中cache,然而也不用付出高昂的代价。
我的中间步骤测试代码如下:
//修改net/netfilter/nf_conntrack_core.c //Email:[email protected] //1.定义 #define A #ifdef A /* * MAX_CACHE动态计算原则: * cache链表长度 = 平均冲突链表长度/3, 其中: * 平均冲突链表长度 = net.nf_conntrack_max/net.netfilter.nf_conntrack_buckets * 3 = 经验值 * */ #define MAX_CACHE 4 struct conntrack_cache { struct nf_conntrack_tuple_hash *caches[MAX_CACHE]; }; DEFINE_PER_CPU(struct conntrack_cache, conntrack_cache); #endif //2.修改resolve_normal_ct static inline struct nf_conn * resolve_normal_ct(struct net *net, struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, int *set_reply, enum ip_conntrack_info *ctinfo) { struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; #ifdef A int i; struct conntrack_cache *cache; #endif if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, protonum, &tuple, l3proto, l4proto)) { pr_debug("resolve_normal_ct: Can't get tuple\n"); return NULL; } #ifdef A cache = &__get_cpu_var(conntrack_cache); rcu_read_lock(); if (0 /* 优化3 */) { goto slowpath; } for (i = 0; i < MAX_CACHE; i++) { struct nf_conntrack_tuple_hash *ch = cache->caches[i]; struct nf_conntrack_tuple_hash *ch0 = cache->caches[0]; if (ch && nf_ct_tuple_equal(&tuple, &ch->tuple)) { ct = nf_ct_tuplehash_to_ctrack(ch); if (unlikely(nf_ct_is_dying(ct) || !atomic_inc_not_zero(&ct->ct_general.use))) { h = NULL; goto slowpath; } else { if (unlikely(!nf_ct_tuple_equal(&tuple, &ch->tuple))) { nf_ct_put(ct); h = NULL; goto slowpath; } } /*************************************** 优化1简介 *****************************************/ /* 并非直接提升到第一个,而是根据两次cache命中的间隔酌情提升,提升的步数与时间间隔成反比 */ /* 这就避免了cache队列本身的剧烈抖动。事实上,命中的时间间隔如果能加权历史间隔值,效果更好 */ /*******************************************************************************************/ /* * 基于时间局部性提升命中项的优先级 */ if (i > 0 /* && 优化1 */) { cache->caches[0] = ch; cache->caches[i] = ch0; } h = ch; } } ct = NULL; slowpath: rcu_read_unlock(); if (!h) #endif /* look for tuple match */ h = nf_conntrack_find_get(net, &tuple); if (!h) { h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff); if (!h) return NULL; if (IS_ERR(h)) return (void *)h; } #ifdef A else { int j; struct nf_conn *ctp; struct nf_conntrack_tuple_hash *chp; /*********************** 优化2简介 **************************/ /* 只有连续两个数据包到达的时间间隔小于n时才会执行cache替换 */ /* 这是为了避免诸如ICMP之类的慢速流导致的cache抖动 */ /************************************************************/ if (0 /* 优化2 */) { goto skip; } /************************** 优化3简介 *****************************/ /* 只有在总的conntrack数量大于hash bucket数量的4倍时才启用cache */ /* 因为conntrack数量小的话,经过一次hash运算就可以一次定位, */ /* 或者经过遍历很短的冲突链表即可定位,使用cache反而降低了性能 */ /******************************************************************/ if (0 /* 优化3 */) { goto skip; } ct = nf_ct_tuplehash_to_ctrack(h); nf_conntrack_get(&ct->ct_general); chp = cache->caches[MAX_CACHE-1]; for (j = MAX_CACHE-1; j > 0; j--) { cache->caches[j] = cache->caches[j-1]; } cache->caches[0] = h; if (chp) { ctp = nf_ct_tuplehash_to_ctrack(chp); nf_conntrack_put(&ctp->ct_general); } } skip: if (!ct) { ct = nf_ct_tuplehash_to_ctrack(h); } #else ct = nf_ct_tuplehash_to_ctrack(h); #endif /* It exists; we have (non-exclusive) reference. */ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; /* Please set reply bit if this packet OK */ *set_reply = 1; } else { /* Once we've had two way comms, always ESTABLISHED. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { pr_debug("nf_conntrack_in: normal packet for %p\n", ct); *ctinfo = IP_CT_ESTABLISHED; } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { pr_debug("nf_conntrack_in: related packet for %p\n", ct); *ctinfo = IP_CT_RELATED; } else { pr_debug("nf_conntrack_in: new packet for %p\n", ct); *ctinfo = IP_CT_NEW; } *set_reply = 0; } skb->nfct = &ct->ct_general; skb->nfctinfo = *ctinfo; return ct; } //2.修改nf_conntrack_init int nf_conntrack_init(struct net *net) { int ret; #ifdef A int i; #endif if (net_eq(net, &init_net)) { ret = nf_conntrack_init_init_net(); if (ret < 0) goto out_init_net; } ret = nf_conntrack_init_net(net); if (ret < 0) goto out_net; if (net_eq(net, &init_net)) { /* For use by REJECT target */ rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); /* Howto get NAT offsets */ rcu_assign_pointer(nf_ct_nat_offset, NULL); } #ifdef A /* 初始化每CPU的conntrack cache队列 */ for_each_possible_cpu(i) { int j; struct conntrack_cache *cache; cache = &per_cpu(conntrack_cache, i); for (j = 0; j < MAX_CACHE; j++) { cache->caches[j] = NULL; } } #endif return 0; out_net: if (net_eq(net, &init_net)) nf_conntrack_cleanup_init_net(); out_init_net: return ret; }