我的负载均衡模块:simpLB_3_2

2010年03月03日 星期三 10时02分56秒
取得一个阶段性成果,该版本的simpLB可以完全实现“轮询调度” 。
连接管理中加入了超时机制,如果timeout时间内某个连接上没有数据包则删除该连接。
HTTP完美测试!
SSH测试存在一些问题:首先,调度的目的服务器可能和本地存储的known_hosts冲突,Clinet端删除~/.ssh/known_hosts中相应条目或删除该文件以解决;
 其次,因为LoadBalancer上的连接管理有超时机制,所以如果ssh登录上以后timeout时间内没有任何动作,该连接会被删除,导致“Connection reset”;
 最后,不很好地实现“轮询调度”。略知,lvs目前只支持FTP,但不知到它对SSH应用支持如何。
SSH测试的问题目前可以放在一边。
目前要重点解决的是:
 完善目的服务器的表述,即构建合适的struct;
 完善对目的服务器的管理,构建链表list,以及各种操作函数(增、删、查、改等)。
 将LoadInfoCollector采集的数据送入内核层以便下一步动态负载调度之用。
  采用/proc文件系统?
  在内核中捕获UDP报文的数据,并解析?

1、simpLB.c
/*简单的轮询调度算法,添加了连接跟踪,可测试HTTP*/
/*用于连接跟踪的定时器,能释放超时连接*/
/*将处理回应包的模块从POST_ROUTING移到FORWARD点*/
#include <linux/kernel.h>
#include <linux/tcp.h>                  /* for tcphdr */
#include <net/ip.h>
#include <net/tcp.h>                    /* for csum_tcpudp_magic */
#include <net/udp.h>
#include <net/icmp.h>                   /* for icmp_send */
#include <net/route.h>                  /* for ip_route_output */
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <linux/icmpv6.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>

#include <linux/timer.h>
#include "sahu_lb_tools.h"

#define VPORT_TCP 80
#define DPORT_TCP 80
#define VPORT_UDP 4950
#define DPORT_UDP 4950

MODULE_LICENSE("GPL");
/* This is the structure we shall use to register our function */
/* IP address we want to Nat*/
static unsigned char *vmLB_ip = "/xc0/xa8/x7a/x01";   /* 192.168.122.1*/
static unsigned char *vm01_ip = "/xc0/xa8/x63/x65";   /* 192.168.99.101 */
static unsigned char *vm02_ip = "/xc0/xa8/x63/x66";   /* 192.168.99.102 */
static unsigned char *srv_list[2];
static int sahu_id = 0;

/* This is the hook function itself */
unsigned int sahu_lb_pre_routing(unsigned int hooknum,
                           struct sk_buff *skb,
                           const struct net_device *in,
                           const struct net_device *out,
                           int (*okfn)(struct sk_buff *))
{
 unsigned char *srv_addr=NULL;
  char addr_str[16];
  struct sk_buff *sb = skb;
  struct iphdr *iph;
  struct tcphdr *tcph;
 struct sahu_lb_conn *cp; 

  if(!sb) return NF_ACCEPT;
  iph = ip_hdr(sb);
  if(!iph) return NF_ACCEPT;

  if (iph->daddr == *(__be32 *)vmLB_ip){/*local in*/
  srv_addr = srv_list[(sahu_id++)%2];
   if(iph->protocol == IPPROTO_TCP){
   tcph = (void *)skb_network_header(skb) + ip_hdrlen(skb) ;
   cp = sahu_lb_conn_in_get(iph->protocol,iph->saddr,tcph->source,iph->daddr,tcph->dest);
   if(!cp){
    printk("A new connection/n");
    cp = sahu_lb_conn_new(iph->protocol,iph->saddr,tcph->source,*(__be32 *)vmLB_ip,tcph->dest,*(__be32 *)srv_addr,tcph->dest);
   }else{
    srv_addr = (unsigned char *)&cp->daddr;
    inet_i2str(cp->daddr,addr_str);
    printk("Alreay has a connection:cp->daddr:%s/n",addr_str);
   }
   tcp_dnat_base(skb,*(__be32 *)vmLB_ip,*(__be32 *)srv_addr,htons(VPORT_TCP),htons(DPORT_TCP));
  }else if(iph->protocol == IPPROTO_UDP){
   udp_dnat_base(skb,*(__be32 *)vmLB_ip,*(__be32 *)srv_addr,htons(VPORT_UDP),htons(DPORT_UDP));
  }else{
  }

    iph->daddr= *(unsigned int *)srv_addr;
    ip_send_check(iph);
  //  skb->local_df = 1;
    printk("DNat: %d.%d.%d.%d To:%d.%d.%d.%d/n",
                *vmLB_ip, *(vmLB_ip + 1), *(vmLB_ip + 2),*(vmLB_ip + 3),
               *srv_addr,*(srv_addr + 1), *(srv_addr + 2),*(srv_addr +3));
    return NF_ACCEPT;
  }else{
    inet_i2str(iph->daddr,addr_str);
    printk("No DNat for %s/n",addr_str);
    return NF_ACCEPT;
  }
}
unsigned int sahu_lb_forward(unsigned int hooknum,
                           struct sk_buff *skb,
                           const struct net_device *in,
                           const struct net_device *out,
                           int (*okfn)(struct sk_buff *))
{
 unsigned char *srv_addr=NULL;
  char addr_str[16];
  struct sk_buff *sb = skb;
  struct iphdr *iph;
  struct tcphdr *tcph;
 struct sahu_lb_conn *cp; 

  if(!sb) return NF_ACCEPT;
  iph = ip_hdr(sb);
  if(!iph) return NF_ACCEPT;

 //srv_addr = srv_list[(sahu_id+1)%2];
  //if (iph->saddr == iph->saddr){//*(__be32 *)srv_addr){/*forward*/
   if(iph->protocol == IPPROTO_TCP){
   tcph = (void *)skb_network_header(skb) + ip_hdrlen(skb) ;
   cp = sahu_lb_conn_in_get(iph->protocol,iph->daddr,tcph->dest,*(__be32 *)vmLB_ip,htons(VPORT_TCP));
   if(!cp){
    printk("Reply not handled!/n");
    return NF_ACCEPT;
   }else{
    srv_addr = (unsigned char *)&cp->daddr;
    inet_i2str(cp->daddr,addr_str);
    printk("cp->daddr:%s/n",addr_str);
   }
   tcp_snat_base(skb,*(__be32 *)vmLB_ip,*(__be32 *)srv_addr,htons(VPORT_TCP),htons(DPORT_TCP));
  }else if(iph->protocol == IPPROTO_UDP){
   udp_snat_base(skb,*(__be32 *)vmLB_ip,*(__be32 *)srv_addr,htons(VPORT_UDP),htons(DPORT_UDP));
  }else{
  }
    iph->saddr= *(unsigned int *)vmLB_ip;
    ip_send_check(iph);
//    skb->local_df = 1;

    printk("SNat: %d.%d.%d.%d To:%d.%d.%d.%d/n",
                *srv_addr, *(srv_addr + 1), *(srv_addr + 2),*(srv_addr + 3),
                *vmLB_ip, *(vmLB_ip + 1), *(vmLB_ip + 2),*(vmLB_ip + 3));
    return NF_ACCEPT;
  //}else{
  //  inet_i2str(iph->saddr,addr_str);
  //  printk("No SNat for %s/n",addr_str);
  //  return NF_ACCEPT;
  //}
}
/* netfilter hooks in this kernel module*/
static struct nf_hook_ops sahu_ops[] __read_mostly = {

  {
    .hook = sahu_lb_pre_routing,
    .owner = THIS_MODULE,
    .pf = PF_INET,
    .hooknum = NF_INET_PRE_ROUTING,
    .priority = 100,
  },

  {
    .hook = sahu_lb_forward,
    .owner = THIS_MODULE,
    .pf = PF_INET,
    .hooknum = NF_INET_FORWARD,
    .priority = 100,
  }

};
/* Initialisation routine */
int init_module()
{
  int ret;
  ret = nf_register_hooks(sahu_ops,ARRAY_SIZE(sahu_ops));
  if(ret<0){
    pr_info("can't install simpLB into kernel!/n");
  }else{
    pr_info("simpLB install into kernel!/n");
  }
 srv_list[0] = vm01_ip;
 srv_list[1] = vm02_ip;
 
 sahu_lb_conn_init();
 return 0;
}
/* Cleanup routine */
void cleanup_module()
{
  nf_unregister_hooks(sahu_ops,ARRAY_SIZE(sahu_ops));
 sahu_lb_conn_cleanup();
  pr_info("simpLB removed from kernel!/n");
}


2、sahu_lb_tools.h
#include <linux/random.h>
#include <linux/jhash.h>

#define SAHU_LB_CONN_TAB_SIZE 100
#define SAHU_LB_CONN_TIMEOUT (3*HZ)

int inet_i2str(unsigned int addr,char *addr_str);

static  struct list_head *sahu_lb_conn_tab;
static unsigned int sahu_lb_conn_rnd;
/*struct: sahu_lb_conn*/
struct sahu_lb_conn{
 struct list_head c_list;
 __u32 caddr;
 __u32 vaddr;
 __u32 daddr;
 __u16 cport;
 __u16 vport;
 __u16 dport;
 __u16 protocol;
 atomic_t refcnt;
 struct timer_list timer;
 volatile unsigned long timeout;
};


/*function: sahu_lb_conn_init*/
int sahu_lb_conn_init(void){
 int idx;
 sahu_lb_conn_tab = vmalloc(SAHU_LB_CONN_TAB_SIZE*sizeof(struct list_head *));
 if(!sahu_lb_conn_tab){
  printk("sahu_lb_conn_init: error/n");
  return -1;
 }
 for(idx=0;idx<SAHU_LB_CONN_TAB_SIZE;idx++){
  INIT_LIST_HEAD(&sahu_lb_conn_tab[idx]);
 }
 get_random_bytes(&sahu_lb_conn_rnd,sizeof(sahu_lb_conn_rnd));
 return 0;
}
/*function: sahu_lb_conn_cleanup*/
void sahu_lb_conn_cleanup(void){
 struct sahu_lb_conn *cp;
 list_for_each_entry(cp,sahu_lb_conn_tab,c_list){
  del_timer_sync(&cp->timer);
 }
 vfree(sahu_lb_conn_tab);
}
/*function: sahu_lb_conn_hashkey*/
static unsigned int sahu_lb_conn_hashkey(unsigned int proto,__u32 addr,__u16 port){
 return jhash_3words((__force u32)addr,(__force u32)port,proto,sahu_lb_conn_rnd);
}
/*function: sahu_lb_conn_hash*/
static inline int sahu_lb_conn_hash(struct sahu_lb_conn *cp){
 unsigned int hash; 
 hash = sahu_lb_conn_hashkey(cp->protocol,cp->caddr,cp->cport);
 //list_add(&cp->c_list,&sahu_lb_conn_tab[hash]);
 list_add(&cp->c_list,sahu_lb_conn_tab);
 return 0;
}
/*function: sahu_lb_conn_unhash*/
static inline int sahu_lb_conn_unhash(struct sahu_lb_conn *cp){
 unsigned int hash;
 hash = sahu_lb_conn_hashkey(cp->protocol,cp->caddr,cp->cport);
 list_del(&cp->c_list);

 return 0;
}
/*function: sahu_lb_conn_expire*/
static void sahu_lb_conn_expire(unsigned long data){
 int idx=0;
 char addr_str[16];
 struct sahu_lb_conn *cp = (struct sahu_lb_conn *)data;
 if(atomic_read(&cp->refcnt)==0){
  del_timer(&cp->timer);
  sahu_lb_conn_unhash(cp);
  kfree(cp);
  return;
 }
 atomic_set(&cp->refcnt,0);
 del_timer(&cp->timer);
 cp->timer.expires = jiffies + SAHU_LB_CONN_TIMEOUT;
 add_timer(&cp->timer);

 inet_i2str(cp->daddr,addr_str);
 printk("Time out! dest_addr: %s/t",addr_str);
 list_for_each_entry(cp,sahu_lb_conn_tab,c_list){
  idx++;
 }
 printk("ConnTabSize:%d/n",idx);
}
/*function: sahu_lb_conn_new*/
struct sahu_lb_conn *
sahu_lb_conn_new(int proto, __u32 caddr,__u16 cport,__u32 vaddr,__u16 vport,__u32 daddr,__u16 dport){
 struct sahu_lb_conn *cp;
 cp = kmalloc(sizeof(struct sahu_lb_conn),GFP_ATOMIC); 
 if(cp == NULL){
  printk("sahu_lb_conn_new: no memory avaliable./n");
  return NULL;
 }
 memset(cp,0,sizeof(*cp));
 INIT_LIST_HEAD(&cp->c_list);

 init_timer(&cp->timer);
 cp->timer.data = (unsigned long)cp;
 cp->timer.function = &sahu_lb_conn_expire;
 cp->timer.expires = jiffies + SAHU_LB_CONN_TIMEOUT;
 add_timer(&cp->timer);

 cp->protocol = proto;
 cp->caddr = caddr;
 cp->cport = cport;
 cp->vaddr = vaddr;
 cp->vport = vport;
 cp->daddr = daddr;
 cp->dport = dport;

 atomic_set(&cp->refcnt,0);

 sahu_lb_conn_hash(cp);
 return cp;
}
/*funciton: sahu_lb_conn_in_get*/
static inline struct sahu_lb_conn *
sahu_lb_conn_in_get(int protocol,__u32 caddr,__u16 cport,__u32 vaddr,__u16 vport){
 unsigned int hash;
 struct sahu_lb_conn *cp;
 hash = sahu_lb_conn_hashkey(protocol,caddr,cport);
 //list_for_each_entry(cp,&sahu_lb_conn_tab[hash],c_list){
 list_for_each_entry(cp,sahu_lb_conn_tab,c_list){
  if(caddr==cp->caddr && cport==cp->cport &&
    vaddr==cp->vaddr && vport==cp->vport &&
    protocol==cp->protocol){
   atomic_set(&cp->refcnt,1);/*touch this connection*/
   return cp;
  }
 }
 return NULL;
}
/*function: inet_i2str*/
int inet_i2str(unsigned int addr,char *addr_str){
  unsigned char *p;
  int i;
  p=(unsigned char *)(&addr);
  for(i=0;i<4;i++){
     addr_str[i*4+0]=*(p+i)/100+'0';
     addr_str[i*4+1]=*(p+i)/10-(*(p+i)/100)*10+'0';
     addr_str[i*4+2]=*(p+i)%10+'0';
     addr_str[i*4+3]='.';
  }
  addr_str[15]='/0';
  return 0;
}
/*function: sahu_lb_check_diff4*/
static inline __wsum sahu_lb_check_diff4(__be32 old, __be32 new, __wsum oldsum){
 __be32 diff[2] = { ~old, new };

 return csum_partial(diff, sizeof(diff), oldsum);
}
/*function: sahu_lb_check_diff2*/
static inline __wsum sahu_lb_check_diff2(__be16 old, __be16 new, __wsum oldsum){
 __be16 diff[2] = { ~old, new };

 return csum_partial(diff, sizeof(diff), oldsum);
}
/*function: tcp_snat_base*/
static int tcp_snat_base(struct sk_buff *skb,__be32 vaddr,__be32 daddr,__be16 vport,__be16 dport){
 
  struct tcphdr *tcph;
  unsigned int tcphoff;
  int oldlen;

 tcphoff = ip_hdrlen(skb);
 oldlen = skb->len - tcphoff; 
 tcph = (void *)skb_network_header(skb) + tcphoff;
 
 tcph->source = vport;
 tcph->check=
  csum_fold(sahu_lb_check_diff4(daddr,vaddr,
   sahu_lb_check_diff2(dport,vport,
    ~csum_unfold(tcph->check))));
 if(skb->ip_summed==CHECKSUM_COMPLETE)
  skb->ip_summed=CHECKSUM_NONE;
 return 0;
}
/*function: tcp_dnat_base*/
static int tcp_dnat_base(struct sk_buff *skb,__be32 vaddr,__be32 daddr,__be16 vport,__be16 dport){
 
  struct tcphdr *tcph;
  unsigned int tcphoff;
  int oldlen;

 tcphoff = ip_hdrlen(skb);
 oldlen = skb->len - tcphoff; 
 tcph = (void *)skb_network_header(skb) + tcphoff;
 
 tcph->dest = dport;
 tcph->check=
  csum_fold(sahu_lb_check_diff4(vaddr,daddr,
   sahu_lb_check_diff2(vport,dport,
    ~csum_unfold(tcph->check))));
 if(skb->ip_summed==CHECKSUM_COMPLETE)
  skb->ip_summed=CHECKSUM_NONE;
 return 0;
}
/*function: udp_snat_base*/
static int udp_snat_base(struct sk_buff *skb,__be32 vaddr,__be32 daddr,__be16 vport,__be16 dport){
 
  struct udphdr *udph;
  unsigned int udphoff;
  int oldlen;

 udphoff = ip_hdrlen(skb);
 oldlen = skb->len - udphoff; 
 udph = (void *)skb_network_header(skb) + udphoff;
 
 udph->source = vport;
 udph->check=
  csum_fold(sahu_lb_check_diff4(daddr,vaddr,
   sahu_lb_check_diff2(dport,vport,
    ~csum_unfold(udph->check))));
 if(!udph->check){
  udph->check = CSUM_MANGLED_0;
 }
 if(skb->ip_summed==CHECKSUM_COMPLETE)
  skb->ip_summed=CHECKSUM_NONE;
 return 0;
}
/*function: udp_dnat_base*/
static int udp_dnat_base(struct sk_buff *skb,__be32 vaddr,__be32 daddr,__be16 vport,__be16 dport){
 
  struct udphdr *udph;
  unsigned int udphoff;
  int oldlen;

 udphoff = ip_hdrlen(skb);
 oldlen = skb->len - udphoff; 
 udph = (void *)skb_network_header(skb) + udphoff;
 
 udph->dest = dport;
 udph->check=
  csum_fold(sahu_lb_check_diff4(vaddr,daddr,
   sahu_lb_check_diff2(vport,dport,
    ~csum_unfold(udph->check))));
 if(!udph->check){
  udph->check = CSUM_MANGLED_0;
 }
 if(skb->ip_summed==CHECKSUM_COMPLETE)
  skb->ip_summed=CHECKSUM_NONE;
 return 0;
}

3、Makefile
obj-m +=simpLB.o
all:
 make -C /lib/modules/`uname -r`/build M=`pwd`
clean:
 make -C /lib/modules/`uname -r`/build M=`pwd` clean
install:
 /sbin/insmod simpLB.ko
remove:
 /sbin/rmmod simpLB
4、测试
参见simpLB_3_0

你可能感兴趣的:(我的负载均衡模块:simpLB_3_2)