qemu的virtqueue实现

和内核一样,qemu也需要支持virtqueue,VirtQueue的定义如下

#define VIRTIO_PCI_VRING_ALIGN         4096

typedef struct VRingDesc
{
    uint64_t addr;
    uint32_t len;
    uint16_t flags;
    uint16_t next;
} VRingDesc;

typedef struct VRingAvail
{
    uint16_t flags;
    uint16_t idx;
    uint16_t ring[0];
} VRingAvail;

typedef struct VRingUsedElem
{
    uint32_t id;
    uint32_t len;
} VRingUsedElem;

typedef struct VRingUsed
{
    uint16_t flags;
    uint16_t idx;
    VRingUsedElem ring[0];
} VRingUsed;

typedef struct VRing
{
    unsigned int num;
    unsigned int align;
    hwaddr desc;
    hwaddr avail;
    hwaddr used;
} VRing;

struct VirtQueue
{
    VRing vring;  /* vring的元数据 */
    hwaddr pa;  /* vring实际的内存地址 */
    uint16_t last_avail_idx;
    /* Last used index value we have signalled on */
    uint16_t signalled_used;

    /* Last used index value we have signalled on */
    bool signalled_used_valid;

    /* Notification enabled? */
    bool notification;

    uint16_t queue_index;

    int inuse;

    uint16_t vector;
    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
    VirtIODevice *vdev;
    EventNotifier guest_notifier;
    EventNotifier host_notifier;
};
可以看出VRing结构体的定义,qemu和内核在ABI上是一致的。virtqueue_init用于初始化vring的元数据,同时qemu提供了一系列接口来读写vring的不同成员,e.g.

static inline uint64_t vring_desc_addr(VirtIODevice *vdev, hwaddr desc_pa,
                                       int i)  /* 读取第i个VRingDesc的addr地址 */
{
    hwaddr pa;
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
    return virtio_ldq_phys(vdev, pa);
}

static inline uint32_t vring_desc_len(VirtIODevice *vdev, hwaddr desc_pa, int i)  /* 读取第i个VRingDesc的len */
{
    hwaddr pa;
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
    return virtio_ldl_phys(vdev, pa);
}

static inline uint16_t vring_desc_flags(VirtIODevice *vdev, hwaddr desc_pa,  /* 读取第i个VRingDesc的flags */
                                        int i)
{
    hwaddr pa;
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
    return virtio_lduw_phys(vdev, pa);
}

static inline uint16_t vring_desc_next(VirtIODevice *vdev, hwaddr desc_pa,  /* 读取第i个VRingDesc的next索引 */
                                       int i)
{
    hwaddr pa;
    pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
    return virtio_lduw_phys(vdev, pa);
}

static inline uint16_t vring_avail_flags(VirtQueue *vq) /* 读取avail ring的flags */
{
    hwaddr pa;
    pa = vq->vring.avail + offsetof(VRingAvail, flags);
    return virtio_lduw_phys(vq->vdev, pa);
}

static inline uint16_t vring_avail_idx(VirtQueue *vq)  /* 读取avail ring的idx */
{
    hwaddr pa;
    pa = vq->vring.avail + offsetof(VRingAvail, idx);
    return virtio_lduw_phys(vq->vdev, pa);
}

static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)  /* 读取avail ring的第i个idx */
{
    hwaddr pa;
    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
    return virtio_lduw_phys(vq->vdev, pa);
}

static inline uint16_t vring_used_event(VirtQueue *vq)  /* 读取avail ring中保存的used_event_idx */
{
    return vring_avail_ring(vq, vq->vring.num);
}

static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)  /* 修改used ring中第i个elem的id */
{
    hwaddr pa;
    pa = vq->vring.used + offsetof(VRingUsed, ring[i].id);
    virtio_stl_phys(vq->vdev, pa, val);
}

static inline void vring_used_ring_len(VirtQueue *vq, int i, uint32_t val)  /* 修改used ring中第i个elem的len */
{
    hwaddr pa;
    pa = vq->vring.used + offsetof(VRingUsed, ring[i].len);
    virtio_stl_phys(vq->vdev, pa, val);
}

static uint16_t vring_used_idx(VirtQueue *vq)  /* 读取used ring中的idx */
{
    hwaddr pa;
    pa = vq->vring.used + offsetof(VRingUsed, idx);
    return virtio_lduw_phys(vq->vdev, pa);
}

static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)  /* 设置used ring中的idx */
{
    hwaddr pa;
    pa = vq->vring.used + offsetof(VRingUsed, idx);
    virtio_stw_phys(vq->vdev, pa, val);
}

static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)  /* 设置used ring中flags的bit位 */
{
    VirtIODevice *vdev = vq->vdev;
    hwaddr pa;
    pa = vq->vring.used + offsetof(VRingUsed, flags);
    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
}

static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)  /* 清理used ring中flags的bit位 */
{
    VirtIODevice *vdev = vq->vdev;
    hwaddr pa;
    pa = vq->vring.used + offsetof(VRingUsed, flags);
    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
}
同时后端也提供了一系列接口来处理used ring,e.g.

virtqueue_pop主要用于从descriptor table中找到available ring中添加的buffer,即guest新添加并让后端处理的buffer

int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
{
    unsigned int i, head, max;
    hwaddr desc_pa = vq->vring.desc;
    VirtIODevice *vdev = vq->vdev;

    if (!virtqueue_num_heads(vq, vq->last_avail_idx)) /* 对比vring_avail_idx(vq)和vq->last_avail_idx,判断vq的avail idx是否有增长 */
        return 0; /* 如果为0表示avail ring没有新的buffer,无需处理直接返回 */

    /* When we start there are none of either input nor output. */
    elem->out_num = elem->in_num = 0;

    max = vq->vring.num;

    i = head = virtqueue_get_head(vq, vq->last_avail_idx++); /* 从last_avail_idx开始,avail ring指向的vring desc entry索引 */
    if (vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {  /* <span style="font-family: Arial, Helvetica, sans-serif;">如果guest enable VIRTIO_RING_F_EVENT_IDX  */</span>
        vring_avail_event(vq, vring_avail_idx(vq));  /* 设置avail_event_idx为最新的avail ring idx值 */
    }

    if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_INDIRECT) { /* 第i个desc的flags如果enable VRING_DESC_F_INDIRECT */
        if (vring_desc_len(vdev, desc_pa, i) % sizeof(VRingDesc)) { /* INDIRECT的desc len必须是sizeof(VRingDesc)的整数倍 */
            error_report("Invalid size for indirect buffer table");
            exit(1);
        }

        /* loop over the indirect descriptor table */
        max = vring_desc_len(vdev, desc_pa, i) / sizeof(VRingDesc); /* 最多遍历max个VRingDesc */
        desc_pa = vring_desc_addr(vdev, desc_pa, i);  /* desc_pa指向indirect指向的VRingDesc数组 */
        i = 0;
    }

    /* Collect all the descriptors */
    do { /* 遍历VRingDesc的项,把addr, len填到VirtQueueElement结构体里 */
        struct iovec *sg;

        if (vring_desc_flags(vdev, desc_pa, i) & VRING_DESC_F_WRITE) {
            if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
                error_report("Too many write descriptors in indirect table");
                exit(1);
            }
            elem->in_addr[elem->in_num] = vring_desc_addr(vdev, desc_pa, i);
            sg = &elem->in_sg[elem->in_num++];
        } else {
            if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
                error_report("Too many read descriptors in indirect table");
                exit(1);
            }
            elem->out_addr[elem->out_num] = vring_desc_addr(vdev, desc_pa, i);
            sg = &elem->out_sg[elem->out_num++];
        }

        sg->iov_len = vring_desc_len(vdev, desc_pa, i); /* sg的iov_base部分被存放到in_addr, out_addr里 */

        /* If we've got too many, that implies a descriptor loop. */
        if ((elem->in_num + elem->out_num) > max) {
            error_report("Looped descriptor");
            exit(1);
        }
    } while ((i = virtqueue_next_desc(vdev, desc_pa, i, max)) != max);  /* 遍历VRingDesc,直到max */

    /* Now map what we have collected */
    virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1); /* 通过cpu_physical_memory_map把地址映射成HVA,存入sg->iov_base */
    virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0); 

    elem->index = head; /* index设置为VRingDesc head index */

    vq->inuse++;

    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
    return elem->in_num + elem->out_num;  /* 返回virtqueue_pop总共的VRingDesc个数, */
}
virtqueue_fill当virtio host端(qemu/vhost)处理完guest放入avail ring中的buffer之后,把buffer解除映射并放入used ring

void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                    unsigned int len, unsigned int idx)
{
    unsigned int offset;
    int i;

    trace_virtqueue_fill(vq, elem, len, idx);

    offset = 0;
    for (i = 0; i < elem->in_num; i++) { /* 取消sg_in的HVA内存映射 */
        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);

        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
                                  elem->in_sg[i].iov_len,
                                  1, size);

        offset += size;
    }

    for (i = 0; i < elem->out_num; i++)  /* 取消sg_out的HVA内存映射 */
        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
                                  elem->out_sg[i].iov_len,
                                  0, elem->out_sg[i].iov_len);

    idx = (idx + vring_used_idx(vq)) % vq->vring.num; /* 计算新的used ring idx值,通过idx + used_event_idx对vring.num取模 */

    /* Get a pointer to the next entry in the used ring. */
    vring_used_ring_id(vq, idx, elem->index);  /* 配置新的used ring项的内容,id是elem->index指向的VRingDesc的索引,len为其长度 */
    vring_used_ring_len(vq, idx, len);
}
virtqueue_flush用于更新user ring的idx

void virtqueue_flush(VirtQueue *vq, unsigned int count)
{
    uint16_t old, new;
    /* Make sure buffer is written before we update index. */
    smp_wmb();
    trace_virtqueue_flush(vq, count);
    old = vring_used_idx(vq);
    new = old + count;
    vring_used_idx_set(vq, new);
    vq->inuse -= count;
    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
        vq->signalled_used_valid = false;  /* 是否触发used_event */
}






你可能感兴趣的:(qemu的virtqueue实现)