IPVS调度层

IPVS子系统的调度器统一注册在全局链表ip_vs_schedulers上。

IPVS调度器注册

函数register_ip_vs_scheduler负责向IPVS系统内注册调度器。内核中不同的调度器以名称作区分,所有注册的调度器链接在全局链表ip_vs_schedulers上。注册函数首先检查全局链表上是否已有相同名称的调度器,如果有的话,说明调度器已注册。否则,将调度器链接到全局链表ip_vs_schedulers上。

int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
{
        struct ip_vs_scheduler *sched;

        /* Make sure that the scheduler with this name doesn't exist in the scheduler list.
         */
        list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
                if (strcmp(scheduler->name, sched->name) == 0) {
                        mutex_unlock(&ip_vs_sched_mutex);
                        ip_vs_use_count_dec();
                        pr_err("%s(): [%s] scheduler already existed in the system\n", __func__, scheduler->name);
                        return -EINVAL;
                }
        }
        /* Add it into the d-linked scheduler list
         */
        list_add(&scheduler->n_list, &ip_vs_schedulers);

目前内核注册的调度器有:

1)LBLCR - Locality-Based Least-Connection with Replication scheduler
2)FO - Weighted Fail Over module
3)WLC - Weighted Least-Connection Scheduling module
4)SED - Shortest Expected Delay scheduling module
5)RR - Round-Robin Scheduling module
6)WRR - Weighted Round-Robin Scheduling module
7)DH - Destination Hashing scheduling module
8)LBLC - Locality-Based Least-Connection scheduling module
9)NQ - Never Queue scheduling module
10)LC - Least-Connection Scheduling module
11)OVF - Overflow-Connection Scheduling module
12)SH - Source Hashing scheduling module

调度器查找

函数ip_vs_scheduler_get两次调用内部的调度器获取函数ip_vs_sched_getbyname,在第一次未找到的情况下,执行相应内核调度器模块的加载之后,在此调用调度器获取函数。完整的调度器模块名称为字符串ip_vs_加上调度器名称。

struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
{
        struct ip_vs_scheduler *sched;

        /* Search for the scheduler by sched_name
         */
        sched = ip_vs_sched_getbyname(sched_name);

        /* If scheduler not found, load the module and search again
         */
        if (sched == NULL) {
                request_module("ip_vs_%s", sched_name);
                sched = ip_vs_sched_getbyname(sched_name);
        }

        return sched;
}

函数ip_vs_sched_getbyname遍历全局调度器链表ip_vs_schedulers,通过对比调度器名称参数,获取调度器结构。

static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
{
        struct ip_vs_scheduler *sched;

        list_for_each_entry(sched, &ip_vs_schedulers, n_list) {

                if (sched->module && !try_module_get(sched->module)) {
                        /* This scheduler is just deleted */
                        continue;
                }
                if (strcmp(sched_name, sched->name)==0) {           
                        mutex_unlock(&ip_vs_sched_mutex);
                        return sched;    /* HIT */
                }
}

调度器绑定

如使用以下的命令配置IPVS虚拟服务,并制定使用RR调度器:

$ ipvsadm -A -t 207.175.44.110:80 -s rr

在以下的添加虚拟服务函数中,如果调度器名称不等于“none”,使用上节的函数ip_vs_scheduler_get获取注册的调度器结构。随后的函数ip_vs_bind_scheduler负责绑定调度器到虚拟服务中。

static int ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p)
{
    struct ip_vs_scheduler *sched = NULL;
    struct ip_vs_service *svc = NULL;

    if (strcmp(u->sched_name, "none")) {
        sched = ip_vs_scheduler_get(u->sched_name);
        if (!sched) {
            pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
            ret = -ENOENT;
            goto out_err;
        }
    }

    /* Bind the scheduler */
    if (sched) {
        ret = ip_vs_bind_scheduler(svc, sched);
        if (ret)
            goto out_err;
        sched = NULL;
    }
}

如下,首先调用调度器自身的服务初始化函数指针init_service,对于RR调度器,其指向函数ip_vs_rr_init_svc。最后,将调度器结构赋予虚拟服务结构的scheduler成员,完成绑定。

int ip_vs_bind_scheduler(struct ip_vs_service *svc, struct ip_vs_scheduler *scheduler)
{
        int ret;

        if (scheduler->init_service) {
                ret = scheduler->init_service(svc);
                if (ret) {
                        pr_err("%s(): init error\n", __func__);
                        return ret;
                }
        }
        rcu_assign_pointer(svc->scheduler, scheduler);
        return 0;
}

调度器执行调度

在IPVS入口函数ip_vs_in中,如果没有找到已有的连接,说明当前报文是一个新发起的连接,使用函数ip_vs_try_to_schedule尝试进行调度。

static unsigned int ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
    pd = ip_vs_proto_data_get(ipvs, iph.protocol);
    pp = pd->pp;
    /* Check if the packet belongs to an existing connection entry
     */
    cp = pp->conn_in_get(ipvs, af, skb, &iph);

    if (unlikely(!cp)) {
        int v;
        if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph))
            return v;
    }
}

函数ip_vs_try_to_schedule将调用特定协议的连接调度函数指针conn_schedule,代码如下:

static unsigned int ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
              int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph)
{
    struct ip_vs_protocol *pp = pd->pp;

    if (!iph->fragoffs) {
        /* Schedule and create new connection entry into cpp */
        if (!pp->conn_schedule(ipvs, af, skb, pd, verdict, cpp, iph))
            return 0;
    }

对于IPVS支持的协议类型UDP、AH/ESP,SCTP和TCP,对应的处理函数分别如下(AH和ESP使用同一个处理函数):

udp_conn_schedule
ah_esp_conn_schedule
sctp_conn_schedule
tcp_conn_schedule	

以TCP协议为例,处理函数tcp_conn_schedule如下,其调用函数ip_vs_schedule进行调度处理。

static int tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
          int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph)
{
    struct ip_vs_service *svc;

    if (svc) {
        /* Let the virtual server select a real server for the incoming connection, and create a connection entry.
         */
        *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
}

最终由函数ip_vs_schedule调用虚拟服务中绑定的调度器使用其特定的调度函数指针schedule执行调度处理,返回选择的目的真实服务器。

struct ip_vs_conn *ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
           struct ip_vs_proto_data *pd, int *ignored, struct ip_vs_iphdr *iph)
{
    struct ip_vs_scheduler *sched;
    struct ip_vs_dest *dest;

    sched = rcu_dereference(svc->scheduler);
    if (sched) {
        /* read svc->sched_data after svc->scheduler */
        smp_rmb();
        dest = sched->schedule(svc, skb, iph);
    }

内核版本 4.15

你可能感兴趣的:(负载均衡)