网卡驱动6-做一个与外界交互的虚拟网卡5(代码及调试)

驱动有三个文件:vnic.h vnic_core.c vnic_dev.c

应用层:rcv.c send.c(发送程序是pc和arm板共用的)

代码有注释,我就不详细解释了。如有疑问,欢迎讨论!

vnic.h

#ifndef __VNIC_H__
#define __VNIC_H__

#include <linux/u64_stats_sync.h>

#define VNIC_HLEN    4        /* The additional bytes (on top of the Ethernet header)
                     * that VLAN requires.
                     */
#define VNIC_ETH_ALEN    6        /* Octets in one ethernet addr     */
#define VNIC_ETH_HLEN    18        /* Total octets in header.     */
#define VNIC_ETH_ZLEN    64        /* Min. octets in frame sans FCS */

#define ETH_P_VNIC    0x8877

#define VNIC_N_VID        20 //我们只可以设置20个虚拟网卡

/* found in socket.c */
extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));

/* if this changes, algorithm will have to be reworked because this
 * depends on completely exhausting the VNIC identifier space.  Thus
 * it gives constant time look-up, but in many cases it wastes memory.
 */
#define VNIC_GROUP_ARRAY_SPLIT_PARTS  1//只用一个组
#define VNIC_GROUP_ARRAY_PART_LEN     (VNIC_N_VID/VNIC_GROUP_ARRAY_SPLIT_PARTS)

/**
 *    struct vnic_priority_tci_mapping - vnic egress priority mappings
 *    @priority: skb priority
 *    @vnic_qos: vnic priority: (skb->priority << 13) & 0xE000
 *    @next: pointer to next struct
 */
struct vnic_priority_tci_mapping {
    u32                    priority;
    u16                    vnic_qos;
    struct vnic_priority_tci_mapping    *next;
};

/**
 *    struct vnic_pcpu_stats - vnic percpu rx/tx stats
 *    @rx_packets: number of received packets
 *    @rx_bytes: number of received bytes
 *    @rx_multicast: number of received multicast packets
 *    @tx_packets: number of transmitted packets
 *    @tx_bytes: number of transmitted bytes
 *    @syncp: synchronization point for 64bit counters
 *    @rx_errors: number of rx errors
 *    @tx_dropped: number of tx drops
 */
struct vnic_pcpu_stats {
    u64            rx_packets;
    u64            rx_bytes;
    u64            rx_multicast;
    u64            tx_packets;
    u64            tx_bytes;
    struct u64_stats_sync    syncp;
    u32            rx_errors;
    u32            tx_dropped;
};

/**
 *    struct vnic_dev_info - vnic private device data
 *    @vnic_id: vnic identifier
 *    @flags: device flags
 *    @real_dev: underlying netdevice
 *    @real_dev_addr: address of underlying netdevice
 *    @dent: proc dir entry
 *    @vnic_pcpu_stats: ptr to percpu rx stats
 */
struct vnic_dev_info {
    u8                  vnic_id;

    struct net_device            *real_dev;
    unsigned char                real_dev_addr[ETH_ALEN];

    struct proc_dir_entry            *dent;
    struct vnic_pcpu_stats __percpu        *vnic_pcpu_stats;
};

static inline struct vnic_dev_info *vnic_dev_info(const struct net_device *dev)
{
    return netdev_priv(dev);
}

extern int register_vnic_dev(struct net_device *dev);
extern void unregister_vnic_dev(void);

struct proc_dir_entry;

struct vnic_net {
    /* /proc/net/vnic */
    struct proc_dir_entry *proc_vnic_dir;
    /* /proc/net/vnic/config */
    struct proc_dir_entry *proc_vnic_conf;
    /* Determines interface naming scheme. */
    unsigned short name_type;
};

struct vnic_group {
    struct net_device    *real_dev; /* The ethernet(like) device
                        * the vnic is attached to.
                        */
    unsigned int        nr_vnics;
    struct hlist_node    hlist;    /* linked list */
    struct net_device **vnic_devices_arrays[VNIC_GROUP_ARRAY_SPLIT_PARTS];
    struct rcu_head        rcu;
};

/**
 *    struct vnic_ethhdr - vnic ethernet header (ethhdr + vnic_hdr)
 *    @h_dest: destination ethernet address
 *    @h_source: source ethernet address
 *    @h_vnic_proto: ethernet protocol (always 0x8877)
 */
struct vnic_ethhdr {
    unsigned char    h_dest[ETH_ALEN];
    unsigned char    h_source[ETH_ALEN];
    __be16        h_vnic_proto;
    __be16        h_vnic_data;
};

static inline void vnic_group_set_device(struct vnic_group *vg,
                     u16 vnic_id,
                     struct net_device *dev)
{
    struct net_device **array;
    if (!vg)
        return;
    array = vg->vnic_devices_arrays[vnic_id / VNIC_GROUP_ARRAY_PART_LEN];
    array[vnic_id % VNIC_GROUP_ARRAY_PART_LEN] = dev;
}

static inline struct net_device *vnic_group_get_device(struct vnic_group *vg,
                     u16 vnic_id)
{
    struct net_device **array;
    array = vg->vnic_devices_arrays[vnic_id / VNIC_GROUP_ARRAY_PART_LEN];
    return array ? array[vnic_id % VNIC_GROUP_ARRAY_PART_LEN] : NULL;
}

static inline struct sk_buff *vnic_put_tag(struct sk_buff *skb, u16 vnic_data)
{
    struct vnic_ethhdr *beth;

    if (skb_cow_head(skb, VNIC_HLEN) < 0) {
        kfree_skb(skb);
        return NULL;
    }
    beth = (struct vnic_ethhdr *)skb_push(skb, VNIC_HLEN);

    /* 把mac地址放到新头的开始 */
    memmove(skb->data, skb->data + VNIC_HLEN, 2 * VNIC_ETH_ALEN);
    skb->mac_header -= VNIC_HLEN;

    /* 加上协议类型,这里的赋值会传输到网络上 */
    beth->h_vnic_proto = htons(ETH_P_VNIC);

    beth->h_vnic_data = htons(vnic_data);
    skb->protocol = htons(ETH_P_VNIC);//这里赋值是上报给内核,内核可能不会处理我这个屌丝协议,所以你赋0也可以。

    return skb;
}


#endif /*  */


vnic_core.c

/*
 */

#include <linux/capability.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/rculist.h>
#include <net/p8022.h>
#include <net/arp.h>
#include <net/ip.h>
#include <linux/rtnetlink.h>
#include <linux/notifier.h>
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <asm/uaccess.h>
#include <linux/etherdevice.h>
#include <linux/ip.h>

#include "vnic.h"

#define DRV_VERSION "1.0"

/* End of global variables definitions. */

static void vnic_group_free(struct vnic_group *grp)
{
    int i;

    for (i = 0; i < VNIC_GROUP_ARRAY_SPLIT_PARTS; i++)//释放所有已申请的虚拟网卡
        kfree(grp->vnic_devices_arrays[i]);
    kfree(grp);//释放一个vnic_group结构
}

static struct vnic_group *vnic_group_alloc(void)
{
    struct vnic_group *grp;

    grp = kzalloc(sizeof(struct vnic_group), GFP_KERNEL);//申请一个vnic_group结构
    if (!grp)
        return NULL;

    return grp;
}

static int vnic_group_prealloc_vid(struct vnic_group *vg, u16 vnic_id)
{
    struct net_device **array;
    unsigned int size;

    array = vg->vnic_devices_arrays[vnic_id / VNIC_GROUP_ARRAY_PART_LEN];//这样是为了多组设计的,其实我就一组
    if (array != NULL)
        return 0;

    size = sizeof(struct net_device *) * VNIC_GROUP_ARRAY_PART_LEN;//一组有8个
    array = kzalloc(size, GFP_KERNEL);
    if (array == NULL)
        return -ENOBUFS;

    vg->vnic_devices_arrays[vnic_id / VNIC_GROUP_ARRAY_PART_LEN] = array;

    return 0;
}

static struct net_device *real_netdev = NULL;
static struct vnic_group *vnic_grp = NULL;

static int vnic_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev)
{
    struct net_device *vnic_dev;
    struct vnic_pcpu_stats *rx_stats;
    u16 vnic_id = skb->data[1];//这部就相当于解析我们自己的头

#if 1
    u16 i;
    for (i = 0; i < skb->mac_len; i++)
    {
        printk("%x ", skb->mac_header[i]);
    }

    printk("\n");
#endif

    vnic_dev = vnic_group_get_device(vnic_grp, vnic_id);
    if (vnic_dev == NULL)
    {
        return -1;
    }

    //检查skb的应用计数是否大于1,大于1意味着内核的其他部分拥有对
    //该缓冲区的引用。如果大于1,会自己建立一份缓冲区副本。
    skb = skb_share_check(skb, GFP_ATOMIC);
    if (unlikely(!skb))
    {
        return false;
    }

    skb->dev = vnic_dev;
    //PACKET_OTHERHOST表示L2目的地址和接收接口的地址不同
    //通常会被丢弃掉。如果网卡进入混杂模式,会接收所以包
    //这里我们就要自己比较一下。
    if (skb->pkt_type == PACKET_OTHERHOST) {
        if (!compare_ether_addr(eth_hdr(skb)->h_dest, vnic_dev->dev_addr))
            skb->pkt_type = PACKET_HOST;
    }

    rx_stats = (struct vnic_pcpu_stats *)this_cpu_ptr(vnic_dev_info(vnic_dev)->vnic_pcpu_stats);
    
    u64_stats_update_begin(&rx_stats->syncp);
    rx_stats->rx_packets++;
    rx_stats->rx_bytes += skb->len;
    if (skb->pkt_type == PACKET_MULTICAST)
        rx_stats->rx_multicast++;
    u64_stats_update_end(&rx_stats->syncp);

    return 0;
}

static struct packet_type vnic_pack_type __read_mostly =
{
    .type = cpu_to_be16(ETH_P_VNIC),
    .func = vnic_rcv,
};

void unregister_vnic_dev(void)
{
    struct vnic_dev_info *vnic;
    struct net_device *real_dev;
    struct net_device *tempdev;
    u16 n = vnic_grp->nr_vnics, i;
    LIST_HEAD(list);

    rtnl_lock();
    for (i = 0; i < n; i++)
    {
        tempdev = vnic_group_get_device(vnic_grp, i);
        if (tempdev == NULL)
        {
            goto enodev;
        }
        vnic = vnic_dev_info(tempdev);
        real_dev = vnic->real_dev;

        vnic_grp->nr_vnics--;//已申请虚拟网卡数减一
        vnic_group_set_device(vnic_grp, i, NULL);//根据vnic_id获取对应的虚拟网卡

        //unregister_netdevice_queue(tempdev, &list);
        unregister_netdevice(tempdev);
        /* 减少真实设备的应用应用计数 */
        dev_put(real_dev);
        vnic->real_dev = NULL;
    }
enodev:
    rtnl_unlock();
}
EXPORT_SYMBOL(unregister_vnic_dev);

int register_vnic_dev(struct net_device *dev)
{
    struct vnic_dev_info *vnic = vnic_dev_info(dev);
    struct net_device *real_dev = vnic->real_dev = real_netdev;
    u16 vnic_id = vnic->vnic_id;
    int err;

    if ((vnic_grp == NULL) || (real_netdev == NULL))
    {
        return -ENODEV;
    }
    
    dev->mtu = real_netdev->mtu;


    rtnl_lock();

    err = register_netdevice(dev);//装载网卡
    if (err < 0)
    {
        rtnl_unlock();

        goto out_uninit_applicant;
    }

    /* 增加真实设备的应用应用计数 */
    dev_hold(real_dev);

    //这个函数是根据real_dev的dormat和连接状态来控制dev的连接上报
    //下面的event也是通过它。
    netif_stacked_transfer_operstate(real_dev, dev);
    
    rtnl_unlock();

    /* So, got the sucker initialized, now lets place
     * it into our local structure.
     */
    vnic_group_set_device(vnic_grp, vnic_id, dev);//设置次虚拟网卡的id
    vnic_grp->nr_vnics++;//已申请虚拟网卡数加一

    return 0;

out_uninit_applicant:

    return err;
}
EXPORT_SYMBOL(register_vnic_dev);

static void vnic_sync_address(struct net_device *dev,
                  struct net_device *vnicdev)//同步mac地址
{
    struct vnic_dev_info *vnic = vnic_dev_info(vnicdev);

    /* May be called without an actual change */
    /*
    下面这个函数很酷,有个判断数组相等的公式:
    ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
    成立就是相等
    */
    if (!compare_ether_addr(vnic->real_dev_addr, dev->dev_addr))//相等退出
        return;

    /* vnic address was different from the old address and is equal to
     * the new address */
    if (compare_ether_addr(vnicdev->dev_addr, vnic->real_dev_addr) &&
        !compare_ether_addr(vnicdev->dev_addr, dev->dev_addr))
        dev_uc_del(dev, vnicdev->dev_addr);//释放单播地址

    /* vnic address was equal to the old address and is different from
     * the new address */
    if (!compare_ether_addr(vnicdev->dev_addr, vnic->real_dev_addr) &&
        compare_ether_addr(vnicdev->dev_addr, dev->dev_addr))
        dev_uc_add(dev, vnicdev->dev_addr);//增加单播地址,这样会启动混杂模式,进行监听
                                           //由于vnic和真实网卡的mac不一样,所以要用混杂模式

    memcpy(vnic->real_dev_addr, dev->dev_addr, ETH_ALEN);//赋值地址
}

//通知链会调用的函数
static int vnic_device_event(struct notifier_block *unused, unsigned long event,
                 void *ptr)
{
    struct net_device *dev = ptr;
    int i, flgs;
    struct net_device *vnicdev;
    struct vnic_dev_info *vnic;

    if (!vnic_grp)
        goto out;

    /* It is OK that we do not hold the group lock right now,
     * as we run under the RTNL lock.
     */

    switch (event) {
    case NETDEV_CHANGE:
        /* Propagate real device state to vnic devices */
        for (i = 0; i < VNIC_N_VID; i++) {
            vnicdev = vnic_group_get_device(vnic_grp, i);
            if (!vnicdev)
                continue;

            netif_stacked_transfer_operstate(dev, vnicdev);//链接状态刷新
        }
        break;

    case NETDEV_CHANGEADDR:
        /* Adjust unicast filters on underlying device */
        for (i = 0; i < VNIC_N_VID; i++) {
            vnicdev = vnic_group_get_device(vnic_grp, i);
            if (!vnicdev)
                continue;

            flgs = vnicdev->flags;
            if (!(flgs & IFF_UP))
                continue;

            vnic_sync_address(dev, vnicdev);//更新物理地址,上面有。
        }
        break;

    case NETDEV_CHANGEMTU://MTU被更新。
        for (i = 0; i < VNIC_N_VID; i++) {
            vnicdev = vnic_group_get_device(vnic_grp, i);
            if (!vnicdev)
                continue;

            if (vnicdev->mtu <= dev->mtu)
                continue;

            dev_set_mtu(vnicdev, dev->mtu);
        }
        break;

    case NETDEV_FEAT_CHANGE://功能发生变化
        /* Propagate device features to underlying device */
        for (i = 0; i < VNIC_N_VID; i++) {
            vnicdev = vnic_group_get_device(vnic_grp, i);
            if (!vnicdev)
                continue;

            netdev_update_features(vnicdev);
        }

        break;

    case NETDEV_DOWN://关闭
        /* Put all VNICs for this dev in the down state too.  */
        for (i = 0; i < VNIC_N_VID; i++) {
            vnicdev = vnic_group_get_device(vnic_grp, i);
            if (!vnicdev)
                continue;

            flgs = vnicdev->flags;
            if (!(flgs & IFF_UP))
                continue;

            vnic = vnic_dev_info(vnicdev);

            dev_change_flags(vnicdev, flgs & ~IFF_UP);
            netif_stacked_transfer_operstate(dev, vnicdev);
        }
        break;

    case NETDEV_UP://开启
        /* Put all VNICs for this dev in the up state too.  */
        for (i = 0; i < VNIC_N_VID; i++) {
            vnicdev = vnic_group_get_device(vnic_grp, i);
            if (!vnicdev)
                continue;

            flgs = vnicdev->flags;
            if (flgs & IFF_UP)
                continue;

            vnic = vnic_dev_info(vnicdev);

            dev_change_flags(vnicdev, flgs | IFF_UP);
            netif_stacked_transfer_operstate(dev, vnicdev);
        }
        break;

    case NETDEV_UNREGISTER://注销
        /* twiddle thumbs on netns device moves */
        if (dev->reg_state != NETREG_UNREGISTERING)
            break;

        //unregister_vnic_dev();
    
        break;

    //下面和bonding有关
    case NETDEV_PRE_TYPE_CHANGE:
        /* Forbid underlaying device to change its type. */
        return NOTIFY_BAD;

    case NETDEV_NOTIFY_PEERS:
    case NETDEV_BONDING_FAILOVER://失败
        /* Propagate to vnic devices */
        for (i = 0; i < VNIC_N_VID; i++) {
            vnicdev = vnic_group_get_device(vnic_grp, i);
            if (!vnicdev)
                continue;

            call_netdevice_notifiers(event, vnicdev);//通过call去传播失败信息
        }
        break;
    }

out:
    return NOTIFY_DONE;
}

static struct notifier_block vnic_notifier_block __read_mostly = {
    .notifier_call = vnic_device_event,
};

static int __init vnic_proto_init(void)
{
    int err;
    real_netdev = dev_get_by_name(&init_net, "eth0");
    if (real_netdev == NULL)
    {
        return -ENODEV;
    }

    vnic_grp = vnic_group_alloc();
    if (vnic_grp == NULL)
    {
        err = -ENOBUFS;

        goto error;
    }

    vnic_grp->real_dev = real_netdev;

    err = vnic_group_prealloc_vid(vnic_grp, 0);
    if (err < 0)
    {
        vnic_group_free(vnic_grp);
        goto error;
    }

    err = register_netdevice_notifier(&vnic_notifier_block);
    if (err < 0)
    {
        vnic_group_free(vnic_grp);
        goto error;
    }

    dev_add_pack(&vnic_pack_type);

    return 0;

error:

    return err;
}
    
static void __exit vnic_cleanup_module(void)
{
    if (vnic_grp != NULL)
    {
        vnic_group_free(vnic_grp);
    }

    unregister_netdevice_notifier(&vnic_notifier_block);

    dev_remove_pack(&vnic_pack_type);
}

module_init(vnic_proto_init);
module_exit(vnic_cleanup_module);

MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);


vnic_dev.c

/*
 */

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <net/arp.h>

#include "vnic.h"

#define VNIC_CNT 2

static netdev_tx_t vnic_dev_hard_start_xmit(struct sk_buff *skb,
                        struct net_device *dev)
{
    struct vnic_ethhdr *veth = (struct vnic_ethhdr *)(skb->data);
    unsigned int len;
    int ret;

    if (veth->h_vnic_proto != htons(ETH_P_VNIC)) 
    {
        vnic_put_tag(skb, ('w' << 8) | (vnic_dev_info(dev)->vnic_id));
    }

    skb_set_dev(skb, vnic_dev_info(dev)->real_dev);
    len = skb->len;
    ret = dev_queue_xmit(skb);
    if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) 
    {
        struct vnic_pcpu_stats *stats;
        stats = this_cpu_ptr(vnic_dev_info(dev)->vnic_pcpu_stats);
        u64_stats_update_begin(&stats->syncp);//内存屏蔽,保证上面的获取在下面的赋值前完成
        stats->tx_packets++;//发送报计数
        stats->tx_bytes += len;//发送字节计数
        u64_stats_update_end(&stats->syncp);
    } 
    else 
    {
        this_cpu_inc(vnic_dev_info(dev)->vnic_pcpu_stats->tx_dropped);//失败计数
    }

    return ret;
}

//更改最大传输单元的长度
static int vnic_dev_change_mtu(struct net_device *dev, int new_mtu)
{
    if (vnic_dev_info(dev)->real_dev->mtu < new_mtu)
        return -ERANGE;

    dev->mtu = new_mtu;

    return 0;
}

void vnic_dev_get_realdev_name(const struct net_device *dev, char *result)
{
    strncpy(result, vnic_dev_info(dev)->real_dev->name, 23);
}

static int vnic_dev_open(struct net_device *dev)
{
    struct vnic_dev_info *vnic = vnic_dev_info(dev);
    struct net_device *real_dev = vnic->real_dev;
    int err;

    if (!(real_dev->flags & IFF_UP))//不是UP命令
        return -ENETDOWN;

    //下面的东西已经解释过,会开启混杂模式
    if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) {
        err = dev_uc_add(real_dev, dev->dev_addr);
        if (err < 0)
            goto out;
    }

    if (dev->flags & IFF_ALLMULTI) {//多播
        err = dev_set_allmulti(real_dev, 1);//更新设备的allmulti计数
        if (err < 0)
            goto del_unicast;
    }
    if (dev->flags & IFF_PROMISC) {//混杂
        err = dev_set_promiscuity(real_dev, 1);//更新设备的promiscuity计数
        if (err < 0)
            goto clear_allmulti;
    }

    memcpy(vnic->real_dev_addr, real_dev->dev_addr, ETH_ALEN);//复制真实网卡的地址

    if (netif_carrier_ok(real_dev))//上报链接状态
        netif_carrier_on(dev);
    return 0;

clear_allmulti:
    if (dev->flags & IFF_ALLMULTI)
        dev_set_allmulti(real_dev, -1);
del_unicast:
    if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
        dev_uc_del(real_dev, dev->dev_addr);
out:
    netif_carrier_off(dev);
    return err;
}

static int vnic_dev_stop(struct net_device *dev)
{
    struct vnic_dev_info *vnic = vnic_dev_info(dev);
    struct net_device *real_dev = vnic->real_dev;

    dev_mc_unsync(real_dev, dev);
    dev_uc_unsync(real_dev, dev);
    if (dev->flags & IFF_ALLMULTI)
        dev_set_allmulti(real_dev, -1);
    if (dev->flags & IFF_PROMISC)
        dev_set_promiscuity(real_dev, -1);

    if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
        dev_uc_del(real_dev, dev->dev_addr);

    netif_carrier_off(dev);
    return 0;
}

static int vnic_dev_set_mac_address(struct net_device *dev, void *p)
{
    struct net_device *real_dev = vnic_dev_info(dev)->real_dev;
    struct sockaddr *addr = p;
    int err;

    if (!is_valid_ether_addr(addr->sa_data))
        return -EADDRNOTAVAIL;

    if (!(dev->flags & IFF_UP))
        goto out;

    //下面的方法在vnic_core已说过
    if (compare_ether_addr(addr->sa_data, real_dev->dev_addr)) {
        err = dev_uc_add(real_dev, addr->sa_data);
        if (err < 0)
            return err;
    }

    if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
        dev_uc_del(real_dev, dev->dev_addr);

out:
    memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
    return 0;
}

//基本交给real dev处理
static int vnic_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
    struct net_device *real_dev = vnic_dev_info(dev)->real_dev;
    const struct net_device_ops *ops = real_dev->netdev_ops;
    struct ifreq ifrr;
    int err = -EOPNOTSUPP;

    strncpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
    ifrr.ifr_ifru = ifr->ifr_ifru;

    switch (cmd) {
    case SIOCGMIIPHY:
    case SIOCGMIIREG:
    case SIOCSMIIREG:
        if (netif_device_present(real_dev) && ops->ndo_do_ioctl)
            err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd);
        break;
    }

    if (!err)
        ifr->ifr_ifru = ifrr.ifr_ifru;

    return err;
}

//看一下程序,主要就是设置多播和混杂
static void vnic_dev_change_rx_flags(struct net_device *dev, int change)
{
    struct net_device *real_dev = vnic_dev_info(dev)->real_dev;

    if (change & IFF_ALLMULTI)
        dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1);
    if (change & IFF_PROMISC)
        dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1);
}

static void vnic_dev_set_rx_mode(struct net_device *vnic_dev)
{
/*
dev_mc_sync:同步一个设备的多播列表到另一个设备
dev_uc_sync:同步一个设备的单播列表到另一个设备
虚拟网卡只能把这些给真实的网卡去做
*/
    dev_mc_sync(vnic_dev_info(vnic_dev)->real_dev, vnic_dev);
    dev_uc_sync(vnic_dev_info(vnic_dev)->real_dev, vnic_dev);
}

static int vnic_dev_init(struct net_device *dev)
{
    struct net_device *real_dev = vnic_dev_info(dev)->real_dev;

    netif_carrier_off(dev);//通知链接断开

    /* IFF_BROADCAST|IFF_MULTICAST; ??? */
    /*
    刚装载驱动时,网卡还在关闭状态,所以要清楚一些标志
    IFF_UP:接口已打开
    IFF_PROMISC:混杂模式
    IFF_ALLMULTI:接受所有组播报文
    IFF_MASTER和IFF_SLAVE:负载平衡器的主从机
    */
    dev->flags  = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
                      IFF_MASTER | IFF_SLAVE);
    dev->iflink = real_dev->ifindex;//接口引索,独一无二的。
    /*
    state赋值,通用网络排队层的几个私有标志要清楚
    */
    dev->state  = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
                      (1<<__LINK_STATE_DORMANT))) |
              (1<<__LINK_STATE_PRESENT);
    /*
    下面的hw设置:
    报文校验,发散汇聚IO使用,TCP分段卸载,可以对高端内存DMA,
    SCTP校验和卸载
    */
    dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG |
               NETIF_F_FRAGLIST | NETIF_F_ALL_TSO |
               NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM;
    /*
    TSO(TCP Segmentation Offload),是一种利用网卡对TCP数据包分片,减轻CPU负荷的一种技术,
    有时也被叫做 LSO (Large segment offload) ,TSO是针对TCP的,UFO是针对UDP的。如果硬件
    支持 TSO功能,同时也需要硬件支持的TCP校验计算和分散/聚集 (Scatter Gather) 功能。

    GSO(Generic Segmentation Offload),它比TSO更通用,基本思想就是尽可能的推迟数据分片直
    至发送到网卡驱动之前,此时会检查网卡是否支持分片功能(如TSO、UFO),如果支持直接发送到
    网卡,如果不支持就进行分片后再发往网卡。这样大数据包只需走一次协议栈,而不是被分割成
    几个数据包分别走,这就提高了效率。

    LRO(Large Receive Offload),通过将接收到的多个TCP数据聚合成一个大的数据包,然后传递给
    网络协议栈处理,以减少上层协议栈处理 开销,提高系统接收TCP数据包的能力。

    GRO(Generic Receive Offload),基本思想跟LRO类似,克服了LRO的一些缺点,更通用。后续的
    驱动都使用GRO的接口,而不是LRO。

    RSS(Receive Side Scaling),是一项网卡的新特性,俗称多队列。具备多个RSS队列的网卡,可
    以将不同的网络流分成不同的队列,再分别将这些队列分配到多个CPU核心上进行处理,从而将负
    荷分散,充分利用多核处理器的能力。
    */
    dev->gso_max_size = real_dev->gso_max_size;

    /* IPv6的共享卡相关的东西 */
    dev->dev_id = real_dev->dev_id;

    //下面判断地址是不是为0,是的话复制真实网卡地址
    if (is_zero_ether_addr(dev->dev_addr))
        memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len);
    if (is_zero_ether_addr(dev->broadcast))
        memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);

    //硬件可能需要的头尾空间
    dev->needed_headroom = real_dev->needed_headroom;
    dev->needed_tailroom = real_dev->needed_tailroom;

    //dev->hard_header_len = real_dev->hard_header_len + VNIC_HLEN;//这里可以加上我们自己的协议头

    //vnic_pcpu_stats记录网卡状态,分配用的是percpu,这样每个cpu都有自己的copy
    //就不需要加锁
    vnic_dev_info(dev)->vnic_pcpu_stats = alloc_percpu(struct vnic_pcpu_stats);
    if (!vnic_dev_info(dev)->vnic_pcpu_stats)
        return -ENOMEM;

    return 0;
}

static void vnic_dev_uninit(struct net_device *dev)
{
    struct vnic_dev_info *vnic = vnic_dev_info(dev);

    free_percpu(vnic->vnic_pcpu_stats);
    vnic->vnic_pcpu_stats = NULL;//对应上面的alloc
}

static u32 vnic_dev_fix_features(struct net_device *dev, u32 features)
{
    struct net_device *real_dev = vnic_dev_info(dev)->real_dev;
    u32 old_features = features;

    features &= real_dev->features;

    if (old_features & NETIF_F_SOFT_FEATURES)//多变的特征--没有特殊的硬件要求
        features |= old_features & NETIF_F_SOFT_FEATURES;

    if (dev_ethtool_get_rx_csum(real_dev))
        features |= NETIF_F_RXCSUM;//接收校验和卸载
    features |= NETIF_F_LLTX;//不要用无锁TX

    return features;
}

static int vnic_ethtool_get_settings(struct net_device *dev,
                     struct ethtool_cmd *cmd)
{
    const struct vnic_dev_info *vnic = vnic_dev_info(dev);
    return dev_ethtool_get_settings(vnic->real_dev, cmd);
}

static void vnic_ethtool_get_drvinfo(struct net_device *dev,
                     struct ethtool_drvinfo *info)
{
    strcpy(info->driver, "vnic");
    strcpy(info->version, "v1.0");
    strcpy(info->fw_version, "N/A");
}

static struct rtnl_link_stats64 *vnic_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
{

    if (vnic_dev_info(dev)->vnic_pcpu_stats) {
        struct vnic_pcpu_stats *p;
        u32 rx_errors = 0, tx_dropped = 0;
        int i;

        for_each_possible_cpu(i) {//差不多就是for ((cpu) = 0; (cpu) < 1; (cpu)++)
            u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
            unsigned int start;

            p = per_cpu_ptr(vnic_dev_info(dev)->vnic_pcpu_stats, i);//获取每-cpu量。
            do {
                start = u64_stats_fetch_begin_bh(&p->syncp);
                rxpackets    = p->rx_packets;
                rxbytes        = p->rx_bytes;
                rxmulticast    = p->rx_multicast;
                txpackets    = p->tx_packets;
                txbytes        = p->tx_bytes;
            } while (u64_stats_fetch_retry_bh(&p->syncp, start));

            stats->rx_packets    += rxpackets;
            stats->rx_bytes        += rxbytes;
            stats->multicast    += rxmulticast;
            stats->tx_packets    += txpackets;
            stats->tx_bytes        += txbytes;
            /* rx_errors & tx_dropped are u32 */
            rx_errors    += p->rx_errors;
            tx_dropped    += p->tx_dropped;//统计信息。
        }
        stats->rx_errors  = rx_errors;
        stats->tx_dropped = tx_dropped;
        //上面就是统计每个cpu的网卡应用信息
    }
    return stats;
}

static const struct ethtool_ops vnic_ethtool_ops = {
    .get_settings            = vnic_ethtool_get_settings,
    .get_drvinfo            = vnic_ethtool_get_drvinfo,
    .get_link        = ethtool_op_get_link,
};

static const struct net_device_ops vnic_netdev_ops = {
//change_mtu.对应ioctl的SIOCSIFMTU
    .ndo_change_mtu        = vnic_dev_change_mtu,
//init和uninit在注册和注销时调用
    .ndo_init        = vnic_dev_init,
    .ndo_uninit        = vnic_dev_uninit,
//open和stop,例如在ifconfig X up会通过sock_ioctl最终调用__dev_open
    .ndo_open        = vnic_dev_open,
    .ndo_stop        = vnic_dev_stop,
//发送
    .ndo_start_xmit =  vnic_dev_hard_start_xmit,
//媒体访问地址是否有效
    .ndo_validate_addr    = eth_validate_addr,
//设置mac地址
    .ndo_set_mac_address    = vnic_dev_set_mac_address,
//更改设备地址列表过滤时调用
    .ndo_set_rx_mode    = vnic_dev_set_rx_mode,
    .ndo_set_multicast_list    = vnic_dev_set_rx_mode,
//对应ioctl的SIOCSIFFLAGS
    .ndo_change_rx_flags    = vnic_dev_change_rx_flags,
    .ndo_do_ioctl        = vnic_dev_ioctl,

//ndo_get_stats64会在cat /proc/net/dev下看到
    .ndo_get_stats64    = vnic_dev_get_stats64,
//网卡设备特征
    .ndo_fix_features    = vnic_dev_fix_features,
};

void vnic_setup(struct net_device *dev)
{
    ether_setup(dev);

    dev->tx_queue_len    = 0;

    dev->netdev_ops        = &vnic_netdev_ops;
    dev->ethtool_ops    = &vnic_ethtool_ops;

    memset(dev->broadcast, 0, ETH_ALEN);
}

struct net_device *ndev[VNIC_CNT];

static __exit void vnic_cleanup(void)
{
    int n;

    unregister_vnic_dev();

    for (n = 0; (n < VNIC_CNT) && (ndev[n] != NULL); n++)
    {
        free_netdev(ndev[n]);
        ndev[n] = NULL;
    }
}

static __init int vnic_init_module(void)
{
    char name[IFNAMSIZ];
    int err, n;

    for (n = 0; n < VNIC_CNT; n++)
    {
        snprintf(name, IFNAMSIZ, "vnic%d", n);
    
        ndev[n] = alloc_netdev(sizeof(struct vnic_dev_info), name, vnic_setup);
        if (ndev[n] == NULL)
        {
            err = -ENOBUFS;
            goto out_free_newdev;
        }

        vnic_dev_info(ndev[n])->vnic_id = n;

        err = register_vnic_dev(ndev[n]);
        if (err < 0)
        {
            free_netdev(ndev[n]);
            ndev[n] = NULL;

            goto out_free_newdev;
        }
    }

    return 0;

out_free_newdev:
    vnic_cleanup();

    return err;
}

module_init(vnic_init_module);
module_exit(vnic_cleanup);

MODULE_LICENSE("GPL");

下面是应用层的代码:

send.c,如果在pc上就define TEXT_X86,arm上就关掉

#include <stdio.h>  
#include <stdlib.h>  
#include <string.h>  
#include <unistd.h>  
#include <sys/socket.h>  
#include <linux/if_packet.h>  
#include <netdb.h>  
#include <errno.h>  
#include <arpa/inet.h>  
#include <sys/ioctl.h>  
#include <pthread.h>    
#include <net/ethernet.h>  
#include <netinet/ether.h>  
#include <net/if.h>  
#include <netinet/ip.h>  
#include<netinet/tcp.h>

static int sockfd;   
static struct sockaddr_ll peer_addr;   

static unsigned char vnic_num = 0;//选择哪个网卡

#define X86_TEST //pc上运行定义它

#ifdef X86_TEST
#define ETH_P_VNIC    0x8877
#endif

struct arp_packet 
{  
    struct ether_header  eh;  
    short data;
};  
 
static void send_pkt(void) 
{  
#ifdef X86_TEST
    unsigned char src_mac[6] = {0x00, 0x0c, 0x29, 0x15, 0xd9, 0xc5}; //发送端地址
    unsigned char dst_mac[6] = {0xc0, 0x6f, 0x65, 0xaa, 0xdf, 0x61};  //接收端地址
#else
    unsigned char src_mac[6] = {0xc0, 0x6f, 0x65, 0xaa, 0xdf, 0x61}; //发送端地址
    unsigned char dst_mac[6] = {0x00, 0x0c, 0x29, 0x15, 0xd9, 0xc5};  //接收端地址
#endif
    struct arp_packet frame; 
    memset(&frame, 0, sizeof(struct arp_packet));
    memcpy(frame.eh.ether_dhost, dst_mac, 6);  
    memcpy(frame.eh.ether_shost, src_mac, 6);  
#ifdef X86_TEST//pc上只能在应用层去加
    frame.eh.ether_type = htons(ETH_P_VNIC);  
    frame.data = htons(('w' << 8) | vnic_num);
#endif

    sendto(sockfd, &frame, sizeof(frame), 0, (struct sockaddr*)&peer_addr, sizeof(peer_addr));   
}  

 

int main(int argc, char **argv) 
{   
    struct ifreq req;

    if (argc <= 2)
    {
        return -1;
    }

    sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_VNIC));  
    if(sockfd == -1)
    {
        perror("socket()");
    }

    memset(&peer_addr, 0, sizeof(peer_addr));  
    peer_addr.sll_family = AF_PACKET;  
  
    strcpy(req.ifr_name, argv[1]);  
    vnic_num = (*argv[2] - '0');

    if(ioctl(sockfd, SIOCGIFINDEX, &req) != 0)
    {
        perror("ioctl()");
    }

    peer_addr.sll_ifindex = req.ifr_ifindex;  
    peer_addr.sll_protocol = htons(ETH_P_ARP);  

    send_pkt();
        
    close(sockfd);

    return 0;  
}  


下面编译调试

先在arm板上装载驱动:

网卡驱动6-做一个与外界交互的虚拟网卡5(代码及调试)_第1张图片

下面就不遮盖MAC地址了

pc上运行

eth0表示选择的网卡,0是我自定义协议中的数,表示下面arm板的对应vnic虚拟网卡

arm上显示

我们看一下/proc

网卡驱动6-做一个与外界交互的虚拟网卡5(代码及调试)_第2张图片

可以看到vnic0中显示的packets加1

下面我们再给vnic1发

看一下arm板


网卡驱动6-做一个与外界交互的虚拟网卡5(代码及调试)_第3张图片

可以看到vnic1也接收了一个包


现在我们反过来,arm发,pc收,由于pc端的内核没有加这个vnic,我只能再应用层写个rcv

rcv.c

#include <stdio.h>  
#include <stdlib.h>  
#include <string.h>  
#include <unistd.h>  
#include <sys/socket.h>  
#include <linux/if_packet.h>  
#include <netdb.h>  
#include <errno.h>  
#include <arpa/inet.h>  
#include <sys/ioctl.h>  
#include <pthread.h>    
#include <net/ethernet.h>  
#include <netinet/ether.h>  
#include <net/if.h>  
#include <netinet/ip.h>  
int sockfd;   
struct sockaddr_ll peer_addr;   

#define ETH_P_VNIC    0x8877
 
int main(int argc, char **argv) 
{   
    struct ifreq req;
    
    unsigned char buff[512];
    struct sockaddr_in clientAddr;
    int len, n;

    if (argc == 1)
    {
        return -1;
    }
    sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_VNIC));  
    if (sockfd == -1)
    {
        perror("socket()");  
    }
    memset(&peer_addr, 0, sizeof(peer_addr));  
    peer_addr.sll_family = AF_PACKET;  
    
    strcpy(req.ifr_name, argv[1]);  
    if (ioctl(sockfd, SIOCGIFINDEX, &req) != 0)
    {
        perror("ioctl()");  
    }
    peer_addr.sll_ifindex = req.ifr_ifindex;  
    peer_addr.sll_protocol = htons(ETH_P_VNIC);  

    len = 0;
    memset(buff, 0, 512);
    while (1)
    {   
        n = recvfrom(sockfd, buff, 511, 0, (struct sockaddr *)&clientAddr, &len);
        if (len > 0)
        {
            for (n = 0; n < len; n++)
            {
                printf("%x ", buff[n]);
            }
            printf("\n");
            len = 0;
            memset(buff, 0, 512);
        }
    }

    close(sockfd);

    return 0;  
}  

gcc -o rcv rcv.c

然后,先运行pc端

./rcv

再arm端运行:

pc会看到:



你可能感兴趣的:(linux,内核,网卡,ARM,linux内核)