ovs qos

ovs支持qos功能,其根据datapath类型调用不同的接口,对于kernel datapath,qos使用kernel提供的tc模块实现了入方向限速和出方向流量整型。对于userspace datapath,qos使用了dpdk提供的qos lib实现比较简单的入方向和出方向限速。

kernel qos

kernel的tc提供了多种调度机制,但是ovs目前只支持如下两种:htb和hfsc,更多qos参数可参考ovs db中qos和queue的定义。

ovs qos_第1张图片

image.png


也可以参考这篇文章,专门讲述qos。

userspace qos

先看一下代码中配置qos的地方,配置这部分是不区分哪种qos的,适用于kernel和userspace qos,区别是根据类型调用不同的api。

userspace qos配置的例子可以参数官网。

不管哪种qos,配置都会下到最底层接口收发包模块,不会在datapath或者slow path中。

遍历所有bridge上所有port下的interface,执行iface_configure_qos

static void
bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
    HMAP_FOR_EACH (br, node, &all_bridges) {
        struct port *port;

        /* We need the datapath ID early to allow LACP ports to use it as the
         * default system ID. */
        bridge_configure_datapath_id(br);

        HMAP_FOR_EACH (port, hmap_node, &br->ports) {
            struct iface *iface;

            LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
                ...
                iface_configure_qos(iface, port->cfg->qos);
                ...
            }
        }
        ...
    }

配置接口的qos,入方向,出方向,队列上的配置都在此函数完成。对于usespace qos来说,调用dpdk提供的接口,入方向qos配置最终调用 netdev_dpdk_set_policing,出方向qos配置调用 netdev_dpdk_set_qos

static void
iface_configure_qos(struct iface *iface, const struct ovsrec_qos *qos)
{
    struct ofpbuf queues_buf;

    ofpbuf_init(&queues_buf, 0);

    if (!qos || qos->type[0] == '\0') {
        netdev_set_qos(iface->netdev, NULL, NULL);
    } else {
        const struct ovsdb_datum *queues;
        struct netdev_queue_dump dump;
        unsigned int queue_id;
        struct smap details;
        bool queue_zero;
        size_t i;

        /* Configure top-level Qos for 'iface'. */
        //class->set_qos(netdev, type, details) -- netdev_dpdk_set_qos
        netdev_set_qos(iface->netdev, qos->type, &qos->other_config);

        /* Deconfigure queues that were deleted. */
        queues = ovsrec_qos_get_queues(qos, OVSDB_TYPE_INTEGER, OVSDB_TYPE_UUID);
        smap_init(&details);
        NETDEV_QUEUE_FOR_EACH (&queue_id, &details, &dump, iface->netdev) {
            if (!queue_ids_include(queues, queue_id)) {
                netdev_delete_queue(iface->netdev, queue_id);
            }
        }
        smap_destroy(&details);

        /* Configure queues for 'iface'. */
        queue_zero = false;
        for (i = 0; i < qos->n_queues; i++) {
            const struct ovsrec_queue *queue = qos->value_queues[i];
            queue_id = qos->key_queues[i];

            if (queue_id == 0) {
                queue_zero = true;
            }

            if (queue->n_dscp == 1) {
                struct ofproto_port_queue *port_queue;

                port_queue = ofpbuf_put_uninit(&queues_buf,
                                               sizeof *port_queue);
                port_queue->queue = queue_id;
                port_queue->dscp = queue->dscp[0];
            }

            //class->set_queue(netdev, queue_id, details)
            netdev_set_queue(iface->netdev, queue_id, &queue->other_config);
        }
        if (!queue_zero) {
            smap_init(&details);
            netdev_set_queue(iface->netdev, 0, &details);
            smap_destroy(&details);
        }
    }

    if (iface->ofp_port != OFPP_NONE) {
        const struct ofproto_port_queue *port_queues = queues_buf.data;
        size_t n_queues = queues_buf.size / sizeof *port_queues;

        ofproto_port_set_queues(iface->port->bridge->ofproto, iface->ofp_port,
                                port_queues, n_queues);
    }

    //netdev->netdev_class->set_policing(netdev, kbits_rate, kbits_burst) -- netdev_dpdk_set_qos
    netdev_set_policing(iface->netdev,
                        MIN(UINT32_MAX, iface->cfg->ingress_policing_rate),
                        MIN(UINT32_MAX, iface->cfg->ingress_policing_burst));

    ofpbuf_uninit(&queues_buf);
}

入方向设置 netdev_dpdk_set_policing

static int
netdev_dpdk_set_policing(struct netdev* netdev, uint32_t policer_rate,
                         uint32_t policer_burst)
{
    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
    struct ingress_policer *policer;

    /* Force to 0 if no rate specified,
     * default to 8000 kbits if burst is 0,
     * else stick with user-specified value.
     */
    policer_burst = (!policer_rate ? 0
                     : !policer_burst ? 8000
                     : policer_burst);

    ovs_mutex_lock(&dev->mutex);

    policer = ovsrcu_get_protected(struct ingress_policer *,
                                    &dev->ingress_policer);

    if (dev->policer_rate == policer_rate &&
        dev->policer_burst == policer_burst) {
        /* Assume that settings haven't changed since we last set them. */
        ovs_mutex_unlock(&dev->mutex);
        return 0;
    }

    /* Destroy any existing ingress policer for the device if one exists */
    if (policer) {
        ovsrcu_postpone(free, policer);
    }

    if (policer_rate != 0) {
        //函数内部调用dpdk lib提供的api进行配置,返回policer
        policer = netdev_dpdk_policer_construct(policer_rate, policer_burst);
    } else {
        policer = NULL;
    }
    //将policer赋值到接口设备 dev->ingress_policer, 从此接口收包后,会使用policer测速。
    ovsrcu_set(&dev->ingress_policer, policer);
    dev->policer_rate = policer_rate;
    dev->policer_burst = policer_burst;
    ovs_mutex_unlock(&dev->mutex);

    return 0;
}
//rte_meter_srtcm_config 为dpdk提供的api
static struct ingress_policer *
netdev_dpdk_policer_construct(uint32_t rate, uint32_t burst)
{
    struct ingress_policer *policer = NULL;
    uint64_t rate_bytes;
    uint64_t burst_bytes;
    int err = 0;

    policer = xmalloc(sizeof *policer);
    rte_spinlock_init(&policer->policer_lock);

    /* rte_meter requires bytes so convert kbits rate and burst to bytes. */
    rate_bytes = rate * 1000ULL / 8;
    burst_bytes = burst * 1000ULL / 8;

    policer->app_srtcm_params.cir = rate_bytes;
    policer->app_srtcm_params.cbs = burst_bytes;
    policer->app_srtcm_params.ebs = 0;
    err = rte_meter_srtcm_config(&policer->in_policer,
                                    &policer->app_srtcm_params);
    if (err) {
        VLOG_ERR("Could not create rte meter for ingress policer");
        free(policer);
        return NULL;
    }

    return policer;
}

出方向设置 netdev_dpdk_set_qos
当前看的是2.8.2版本的ovs,目前只支持一种出方向限速egress_policer_ops 。最终也是调用dpdk的lib api rte_meter_srtcm_config进行配置,和入方向配置使用的函数相同。

static const struct dpdk_qos_ops egress_policer_ops = {
    "egress-policer",    /* qos_name */
    egress_policer_qos_construct,
    egress_policer_qos_destruct,
    egress_policer_qos_get,
    egress_policer_qos_is_equal,
    egress_policer_run
};

static int
netdev_dpdk_set_qos(struct netdev *netdev, const char *type,
                    const struct smap *details)
{
    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
    const struct dpdk_qos_ops *new_ops = NULL;
    struct qos_conf *qos_conf, *new_qos_conf = NULL;
    int error = 0;

    ovs_mutex_lock(&dev->mutex);

    qos_conf = ovsrcu_get_protected(struct qos_conf *, &dev->qos_conf);

    //目前只有一种:egress_policer_ops,name为egress-policer
    new_ops = qos_lookup_name(type);

    if (!new_ops || !new_ops->qos_construct) {
        new_qos_conf = NULL;
        if (type && type[0]) {
            error = EOPNOTSUPP;
        }
    } else if (qos_conf && qos_conf->ops == new_ops
               && qos_conf->ops->qos_is_equal(qos_conf, details)) {
        new_qos_conf = qos_conf;
    } else {
        //egress_policer_qos_construct
        error = new_ops->qos_construct(details, &new_qos_conf);
    }

    if (error) {
        VLOG_ERR("Failed to set QoS type %s on port %s: %s",
                 type, netdev->name, rte_strerror(error));
    }

    if (new_qos_conf != qos_conf) {
        ovsrcu_set(&dev->qos_conf, new_qos_conf);
        if (qos_conf) {
            ovsrcu_postpone(qos_conf->ops->qos_destruct, qos_conf);
        }
    }

    ovs_mutex_unlock(&dev->mutex);

    return error;
}

static int
egress_policer_qos_construct(const struct smap *details,
                             struct qos_conf **conf)
{
    struct egress_policer *policer;
    int err = 0;

    policer = xmalloc(sizeof *policer);
    qos_conf_init(&policer->qos_conf, &egress_policer_ops);
    //解析配置
    egress_policer_details_to_param(details, &policer->app_srtcm_params);
    err = rte_meter_srtcm_config(&policer->egress_meter,
                                 &policer->app_srtcm_params);
    if (!err) {
        *conf = &policer->qos_conf;
    } else {
        free(policer);
        *conf = NULL;
        err = -err;
    }

    return err;
}

报文处理

入方向qos处理
ovs dpdk中收包函数有两种,一种是vhostuser类型,调用netdev_dpdk_vhost_rxq_recv,一种是物理网卡类型的,调用netdev_dpdk_rxq_recv

netdev_dpdk_vhost_rxq_recv
netdev_dpdk_rxq_recv
    struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
    struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
    //从接口设备取出policer
    struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
    int nb_rx;
    int dropped = 0;
    //调用dpdk函数rte_eth_rx_burst收包
    nb_rx = rte_eth_rx_burst(rx->port_id, rxq->queue_id,
                             (struct rte_mbuf **) batch->packets,
                             NETDEV_MAX_BURST);
    //如果policer不为空,说明配置了qos,开始处理报文。              
    if (policer) {
        dropped = nb_rx;
        nb_rx = ingress_policer_run(policer, (struct rte_mbuf **) batch->packets, nb_rx);
            rte_spinlock_lock(&policer->policer_lock);
            cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
                int i = 0;
                int cnt = 0;
                struct rte_mbuf *pkt = NULL;
                uint64_t current_time = rte_rdtsc();

                for (i = 0; i < pkt_cnt; i++) {
                    pkt = pkts[i];
                    /* Handle current packet */
                    if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time)) {
                        if (cnt != i) {
                            pkts[cnt] = pkt;
                        }
                        cnt++;
                    } else {
                        rte_pktmbuf_free(pkt);
                    }
                }

                return cnt;
            rte_spinlock_unlock(&policer->policer_lock);
        dropped -= nb_rx;
    }
//调用dpdk函数rte_meter_srtcm_color_blind_check对每个报文进行着色处理,绿色表示允许报文通过,红色和黄色表示报文超速,需要丢弃。
static inline bool
netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
                               struct rte_mbuf *pkt, uint64_t time)
{
    uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);

    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
                                                e_RTE_METER_GREEN;
}

出方向qos处理
出方向处理也是调用rte_meter_srtcm_color_blind_check对报文着色,根据颜色类型判断是否丢包。

netdev_dpdk_send__
__netdev_dpdk_vhost_send
    cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
        struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev->qos_conf);

        if (qos_conf) {
            rte_spinlock_lock(&qos_conf->lock);
            //egress_policer_run
            cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
                int cnt = 0;
                struct egress_policer *policer =
                    CONTAINER_OF(conf, struct egress_policer, qos_conf);

                cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
                    int i = 0;
                    int cnt = 0;
                    struct rte_mbuf *pkt = NULL;
                    uint64_t current_time = rte_rdtsc();

                    for (i = 0; i < pkt_cnt; i++) {
                        pkt = pkts[i];
                        /* Handle current packet */
                        if (netdev_dpdk_policer_pkt_handle(meter, pkt, current_time)) {
                            if (cnt != i) {
                                pkts[cnt] = pkt;
                            }
                            cnt++;
                        } else {
                            rte_pktmbuf_free(pkt);
                        }
                    }

                    return cnt;
                return cnt;
            rte_spinlock_unlock(&qos_conf->lock);
        }

        return cnt;

也可参考:ovs qos - 简书 (jianshu.com) 

你可能感兴趣的:(OVS,网络)