驱动有三个文件:vnic.h vnic_core.c vnic_dev.c
应用层:rcv.c send.c(发送程序是pc和arm板共用的)
代码有注释,我就不详细解释了。如有疑问,欢迎讨论!
vnic.h
#ifndef __VNIC_H__ #define __VNIC_H__ #include <linux/u64_stats_sync.h> #define VNIC_HLEN 4 /* The additional bytes (on top of the Ethernet header) * that VLAN requires. */ #define VNIC_ETH_ALEN 6 /* Octets in one ethernet addr */ #define VNIC_ETH_HLEN 18 /* Total octets in header. */ #define VNIC_ETH_ZLEN 64 /* Min. octets in frame sans FCS */ #define ETH_P_VNIC 0x8877 #define VNIC_N_VID 20 //我们只可以设置20个虚拟网卡 /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); /* if this changes, algorithm will have to be reworked because this * depends on completely exhausting the VNIC identifier space. Thus * it gives constant time look-up, but in many cases it wastes memory. */ #define VNIC_GROUP_ARRAY_SPLIT_PARTS 1//只用一个组 #define VNIC_GROUP_ARRAY_PART_LEN (VNIC_N_VID/VNIC_GROUP_ARRAY_SPLIT_PARTS) /** * struct vnic_priority_tci_mapping - vnic egress priority mappings * @priority: skb priority * @vnic_qos: vnic priority: (skb->priority << 13) & 0xE000 * @next: pointer to next struct */ struct vnic_priority_tci_mapping { u32 priority; u16 vnic_qos; struct vnic_priority_tci_mapping *next; }; /** * struct vnic_pcpu_stats - vnic percpu rx/tx stats * @rx_packets: number of received packets * @rx_bytes: number of received bytes * @rx_multicast: number of received multicast packets * @tx_packets: number of transmitted packets * @tx_bytes: number of transmitted bytes * @syncp: synchronization point for 64bit counters * @rx_errors: number of rx errors * @tx_dropped: number of tx drops */ struct vnic_pcpu_stats { u64 rx_packets; u64 rx_bytes; u64 rx_multicast; u64 tx_packets; u64 tx_bytes; struct u64_stats_sync syncp; u32 rx_errors; u32 tx_dropped; }; /** * struct vnic_dev_info - vnic private device data * @vnic_id: vnic identifier * @flags: device flags * @real_dev: underlying netdevice * @real_dev_addr: address of underlying netdevice * @dent: proc dir entry * @vnic_pcpu_stats: ptr to percpu rx stats */ struct vnic_dev_info { u8 vnic_id; struct net_device *real_dev; unsigned char real_dev_addr[ETH_ALEN]; struct proc_dir_entry *dent; struct vnic_pcpu_stats __percpu *vnic_pcpu_stats; }; static inline struct vnic_dev_info *vnic_dev_info(const struct net_device *dev) { return netdev_priv(dev); } extern int register_vnic_dev(struct net_device *dev); extern void unregister_vnic_dev(void); struct proc_dir_entry; struct vnic_net { /* /proc/net/vnic */ struct proc_dir_entry *proc_vnic_dir; /* /proc/net/vnic/config */ struct proc_dir_entry *proc_vnic_conf; /* Determines interface naming scheme. */ unsigned short name_type; }; struct vnic_group { struct net_device *real_dev; /* The ethernet(like) device * the vnic is attached to. */ unsigned int nr_vnics; struct hlist_node hlist; /* linked list */ struct net_device **vnic_devices_arrays[VNIC_GROUP_ARRAY_SPLIT_PARTS]; struct rcu_head rcu; }; /** * struct vnic_ethhdr - vnic ethernet header (ethhdr + vnic_hdr) * @h_dest: destination ethernet address * @h_source: source ethernet address * @h_vnic_proto: ethernet protocol (always 0x8877) */ struct vnic_ethhdr { unsigned char h_dest[ETH_ALEN]; unsigned char h_source[ETH_ALEN]; __be16 h_vnic_proto; __be16 h_vnic_data; }; static inline void vnic_group_set_device(struct vnic_group *vg, u16 vnic_id, struct net_device *dev) { struct net_device **array; if (!vg) return; array = vg->vnic_devices_arrays[vnic_id / VNIC_GROUP_ARRAY_PART_LEN]; array[vnic_id % VNIC_GROUP_ARRAY_PART_LEN] = dev; } static inline struct net_device *vnic_group_get_device(struct vnic_group *vg, u16 vnic_id) { struct net_device **array; array = vg->vnic_devices_arrays[vnic_id / VNIC_GROUP_ARRAY_PART_LEN]; return array ? array[vnic_id % VNIC_GROUP_ARRAY_PART_LEN] : NULL; } static inline struct sk_buff *vnic_put_tag(struct sk_buff *skb, u16 vnic_data) { struct vnic_ethhdr *beth; if (skb_cow_head(skb, VNIC_HLEN) < 0) { kfree_skb(skb); return NULL; } beth = (struct vnic_ethhdr *)skb_push(skb, VNIC_HLEN); /* 把mac地址放到新头的开始 */ memmove(skb->data, skb->data + VNIC_HLEN, 2 * VNIC_ETH_ALEN); skb->mac_header -= VNIC_HLEN; /* 加上协议类型,这里的赋值会传输到网络上 */ beth->h_vnic_proto = htons(ETH_P_VNIC); beth->h_vnic_data = htons(vnic_data); skb->protocol = htons(ETH_P_VNIC);//这里赋值是上报给内核,内核可能不会处理我这个屌丝协议,所以你赋0也可以。 return skb; } #endif /* */
/* */ #include <linux/capability.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/rculist.h> #include <net/p8022.h> #include <net/arp.h> #include <net/ip.h> #include <linux/rtnetlink.h> #include <linux/notifier.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <asm/uaccess.h> #include <linux/etherdevice.h> #include <linux/ip.h> #include "vnic.h" #define DRV_VERSION "1.0" /* End of global variables definitions. */ static void vnic_group_free(struct vnic_group *grp) { int i; for (i = 0; i < VNIC_GROUP_ARRAY_SPLIT_PARTS; i++)//释放所有已申请的虚拟网卡 kfree(grp->vnic_devices_arrays[i]); kfree(grp);//释放一个vnic_group结构 } static struct vnic_group *vnic_group_alloc(void) { struct vnic_group *grp; grp = kzalloc(sizeof(struct vnic_group), GFP_KERNEL);//申请一个vnic_group结构 if (!grp) return NULL; return grp; } static int vnic_group_prealloc_vid(struct vnic_group *vg, u16 vnic_id) { struct net_device **array; unsigned int size; array = vg->vnic_devices_arrays[vnic_id / VNIC_GROUP_ARRAY_PART_LEN];//这样是为了多组设计的,其实我就一组 if (array != NULL) return 0; size = sizeof(struct net_device *) * VNIC_GROUP_ARRAY_PART_LEN;//一组有8个 array = kzalloc(size, GFP_KERNEL); if (array == NULL) return -ENOBUFS; vg->vnic_devices_arrays[vnic_id / VNIC_GROUP_ARRAY_PART_LEN] = array; return 0; } static struct net_device *real_netdev = NULL; static struct vnic_group *vnic_grp = NULL; static int vnic_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { struct net_device *vnic_dev; struct vnic_pcpu_stats *rx_stats; u16 vnic_id = skb->data[1];//这部就相当于解析我们自己的头 #if 1 u16 i; for (i = 0; i < skb->mac_len; i++) { printk("%x ", skb->mac_header[i]); } printk("\n"); #endif vnic_dev = vnic_group_get_device(vnic_grp, vnic_id); if (vnic_dev == NULL) { return -1; } //检查skb的应用计数是否大于1,大于1意味着内核的其他部分拥有对 //该缓冲区的引用。如果大于1,会自己建立一份缓冲区副本。 skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) { return false; } skb->dev = vnic_dev; //PACKET_OTHERHOST表示L2目的地址和接收接口的地址不同 //通常会被丢弃掉。如果网卡进入混杂模式,会接收所以包 //这里我们就要自己比较一下。 if (skb->pkt_type == PACKET_OTHERHOST) { if (!compare_ether_addr(eth_hdr(skb)->h_dest, vnic_dev->dev_addr)) skb->pkt_type = PACKET_HOST; } rx_stats = (struct vnic_pcpu_stats *)this_cpu_ptr(vnic_dev_info(vnic_dev)->vnic_pcpu_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; rx_stats->rx_bytes += skb->len; if (skb->pkt_type == PACKET_MULTICAST) rx_stats->rx_multicast++; u64_stats_update_end(&rx_stats->syncp); return 0; } static struct packet_type vnic_pack_type __read_mostly = { .type = cpu_to_be16(ETH_P_VNIC), .func = vnic_rcv, }; void unregister_vnic_dev(void) { struct vnic_dev_info *vnic; struct net_device *real_dev; struct net_device *tempdev; u16 n = vnic_grp->nr_vnics, i; LIST_HEAD(list); rtnl_lock(); for (i = 0; i < n; i++) { tempdev = vnic_group_get_device(vnic_grp, i); if (tempdev == NULL) { goto enodev; } vnic = vnic_dev_info(tempdev); real_dev = vnic->real_dev; vnic_grp->nr_vnics--;//已申请虚拟网卡数减一 vnic_group_set_device(vnic_grp, i, NULL);//根据vnic_id获取对应的虚拟网卡 //unregister_netdevice_queue(tempdev, &list); unregister_netdevice(tempdev); /* 减少真实设备的应用应用计数 */ dev_put(real_dev); vnic->real_dev = NULL; } enodev: rtnl_unlock(); } EXPORT_SYMBOL(unregister_vnic_dev); int register_vnic_dev(struct net_device *dev) { struct vnic_dev_info *vnic = vnic_dev_info(dev); struct net_device *real_dev = vnic->real_dev = real_netdev; u16 vnic_id = vnic->vnic_id; int err; if ((vnic_grp == NULL) || (real_netdev == NULL)) { return -ENODEV; } dev->mtu = real_netdev->mtu; rtnl_lock(); err = register_netdevice(dev);//装载网卡 if (err < 0) { rtnl_unlock(); goto out_uninit_applicant; } /* 增加真实设备的应用应用计数 */ dev_hold(real_dev); //这个函数是根据real_dev的dormat和连接状态来控制dev的连接上报 //下面的event也是通过它。 netif_stacked_transfer_operstate(real_dev, dev); rtnl_unlock(); /* So, got the sucker initialized, now lets place * it into our local structure. */ vnic_group_set_device(vnic_grp, vnic_id, dev);//设置次虚拟网卡的id vnic_grp->nr_vnics++;//已申请虚拟网卡数加一 return 0; out_uninit_applicant: return err; } EXPORT_SYMBOL(register_vnic_dev); static void vnic_sync_address(struct net_device *dev, struct net_device *vnicdev)//同步mac地址 { struct vnic_dev_info *vnic = vnic_dev_info(vnicdev); /* May be called without an actual change */ /* 下面这个函数很酷,有个判断数组相等的公式: ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; 成立就是相等 */ if (!compare_ether_addr(vnic->real_dev_addr, dev->dev_addr))//相等退出 return; /* vnic address was different from the old address and is equal to * the new address */ if (compare_ether_addr(vnicdev->dev_addr, vnic->real_dev_addr) && !compare_ether_addr(vnicdev->dev_addr, dev->dev_addr)) dev_uc_del(dev, vnicdev->dev_addr);//释放单播地址 /* vnic address was equal to the old address and is different from * the new address */ if (!compare_ether_addr(vnicdev->dev_addr, vnic->real_dev_addr) && compare_ether_addr(vnicdev->dev_addr, dev->dev_addr)) dev_uc_add(dev, vnicdev->dev_addr);//增加单播地址,这样会启动混杂模式,进行监听 //由于vnic和真实网卡的mac不一样,所以要用混杂模式 memcpy(vnic->real_dev_addr, dev->dev_addr, ETH_ALEN);//赋值地址 } //通知链会调用的函数 static int vnic_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = ptr; int i, flgs; struct net_device *vnicdev; struct vnic_dev_info *vnic; if (!vnic_grp) goto out; /* It is OK that we do not hold the group lock right now, * as we run under the RTNL lock. */ switch (event) { case NETDEV_CHANGE: /* Propagate real device state to vnic devices */ for (i = 0; i < VNIC_N_VID; i++) { vnicdev = vnic_group_get_device(vnic_grp, i); if (!vnicdev) continue; netif_stacked_transfer_operstate(dev, vnicdev);//链接状态刷新 } break; case NETDEV_CHANGEADDR: /* Adjust unicast filters on underlying device */ for (i = 0; i < VNIC_N_VID; i++) { vnicdev = vnic_group_get_device(vnic_grp, i); if (!vnicdev) continue; flgs = vnicdev->flags; if (!(flgs & IFF_UP)) continue; vnic_sync_address(dev, vnicdev);//更新物理地址,上面有。 } break; case NETDEV_CHANGEMTU://MTU被更新。 for (i = 0; i < VNIC_N_VID; i++) { vnicdev = vnic_group_get_device(vnic_grp, i); if (!vnicdev) continue; if (vnicdev->mtu <= dev->mtu) continue; dev_set_mtu(vnicdev, dev->mtu); } break; case NETDEV_FEAT_CHANGE://功能发生变化 /* Propagate device features to underlying device */ for (i = 0; i < VNIC_N_VID; i++) { vnicdev = vnic_group_get_device(vnic_grp, i); if (!vnicdev) continue; netdev_update_features(vnicdev); } break; case NETDEV_DOWN://关闭 /* Put all VNICs for this dev in the down state too. */ for (i = 0; i < VNIC_N_VID; i++) { vnicdev = vnic_group_get_device(vnic_grp, i); if (!vnicdev) continue; flgs = vnicdev->flags; if (!(flgs & IFF_UP)) continue; vnic = vnic_dev_info(vnicdev); dev_change_flags(vnicdev, flgs & ~IFF_UP); netif_stacked_transfer_operstate(dev, vnicdev); } break; case NETDEV_UP://开启 /* Put all VNICs for this dev in the up state too. */ for (i = 0; i < VNIC_N_VID; i++) { vnicdev = vnic_group_get_device(vnic_grp, i); if (!vnicdev) continue; flgs = vnicdev->flags; if (flgs & IFF_UP) continue; vnic = vnic_dev_info(vnicdev); dev_change_flags(vnicdev, flgs | IFF_UP); netif_stacked_transfer_operstate(dev, vnicdev); } break; case NETDEV_UNREGISTER://注销 /* twiddle thumbs on netns device moves */ if (dev->reg_state != NETREG_UNREGISTERING) break; //unregister_vnic_dev(); break; //下面和bonding有关 case NETDEV_PRE_TYPE_CHANGE: /* Forbid underlaying device to change its type. */ return NOTIFY_BAD; case NETDEV_NOTIFY_PEERS: case NETDEV_BONDING_FAILOVER://失败 /* Propagate to vnic devices */ for (i = 0; i < VNIC_N_VID; i++) { vnicdev = vnic_group_get_device(vnic_grp, i); if (!vnicdev) continue; call_netdevice_notifiers(event, vnicdev);//通过call去传播失败信息 } break; } out: return NOTIFY_DONE; } static struct notifier_block vnic_notifier_block __read_mostly = { .notifier_call = vnic_device_event, }; static int __init vnic_proto_init(void) { int err; real_netdev = dev_get_by_name(&init_net, "eth0"); if (real_netdev == NULL) { return -ENODEV; } vnic_grp = vnic_group_alloc(); if (vnic_grp == NULL) { err = -ENOBUFS; goto error; } vnic_grp->real_dev = real_netdev; err = vnic_group_prealloc_vid(vnic_grp, 0); if (err < 0) { vnic_group_free(vnic_grp); goto error; } err = register_netdevice_notifier(&vnic_notifier_block); if (err < 0) { vnic_group_free(vnic_grp); goto error; } dev_add_pack(&vnic_pack_type); return 0; error: return err; } static void __exit vnic_cleanup_module(void) { if (vnic_grp != NULL) { vnic_group_free(vnic_grp); } unregister_netdevice_notifier(&vnic_notifier_block); dev_remove_pack(&vnic_pack_type); } module_init(vnic_proto_init); module_exit(vnic_cleanup_module); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION);
/* */ #include <linux/module.h> #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <net/arp.h> #include "vnic.h" #define VNIC_CNT 2 static netdev_tx_t vnic_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct vnic_ethhdr *veth = (struct vnic_ethhdr *)(skb->data); unsigned int len; int ret; if (veth->h_vnic_proto != htons(ETH_P_VNIC)) { vnic_put_tag(skb, ('w' << 8) | (vnic_dev_info(dev)->vnic_id)); } skb_set_dev(skb, vnic_dev_info(dev)->real_dev); len = skb->len; ret = dev_queue_xmit(skb); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { struct vnic_pcpu_stats *stats; stats = this_cpu_ptr(vnic_dev_info(dev)->vnic_pcpu_stats); u64_stats_update_begin(&stats->syncp);//内存屏蔽,保证上面的获取在下面的赋值前完成 stats->tx_packets++;//发送报计数 stats->tx_bytes += len;//发送字节计数 u64_stats_update_end(&stats->syncp); } else { this_cpu_inc(vnic_dev_info(dev)->vnic_pcpu_stats->tx_dropped);//失败计数 } return ret; } //更改最大传输单元的长度 static int vnic_dev_change_mtu(struct net_device *dev, int new_mtu) { if (vnic_dev_info(dev)->real_dev->mtu < new_mtu) return -ERANGE; dev->mtu = new_mtu; return 0; } void vnic_dev_get_realdev_name(const struct net_device *dev, char *result) { strncpy(result, vnic_dev_info(dev)->real_dev->name, 23); } static int vnic_dev_open(struct net_device *dev) { struct vnic_dev_info *vnic = vnic_dev_info(dev); struct net_device *real_dev = vnic->real_dev; int err; if (!(real_dev->flags & IFF_UP))//不是UP命令 return -ENETDOWN; //下面的东西已经解释过,会开启混杂模式 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) { err = dev_uc_add(real_dev, dev->dev_addr); if (err < 0) goto out; } if (dev->flags & IFF_ALLMULTI) {//多播 err = dev_set_allmulti(real_dev, 1);//更新设备的allmulti计数 if (err < 0) goto del_unicast; } if (dev->flags & IFF_PROMISC) {//混杂 err = dev_set_promiscuity(real_dev, 1);//更新设备的promiscuity计数 if (err < 0) goto clear_allmulti; } memcpy(vnic->real_dev_addr, real_dev->dev_addr, ETH_ALEN);//复制真实网卡的地址 if (netif_carrier_ok(real_dev))//上报链接状态 netif_carrier_on(dev); return 0; clear_allmulti: if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(real_dev, -1); del_unicast: if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) dev_uc_del(real_dev, dev->dev_addr); out: netif_carrier_off(dev); return err; } static int vnic_dev_stop(struct net_device *dev) { struct vnic_dev_info *vnic = vnic_dev_info(dev); struct net_device *real_dev = vnic->real_dev; dev_mc_unsync(real_dev, dev); dev_uc_unsync(real_dev, dev); if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(real_dev, -1); if (dev->flags & IFF_PROMISC) dev_set_promiscuity(real_dev, -1); if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) dev_uc_del(real_dev, dev->dev_addr); netif_carrier_off(dev); return 0; } static int vnic_dev_set_mac_address(struct net_device *dev, void *p) { struct net_device *real_dev = vnic_dev_info(dev)->real_dev; struct sockaddr *addr = p; int err; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; if (!(dev->flags & IFF_UP)) goto out; //下面的方法在vnic_core已说过 if (compare_ether_addr(addr->sa_data, real_dev->dev_addr)) { err = dev_uc_add(real_dev, addr->sa_data); if (err < 0) return err; } if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) dev_uc_del(real_dev, dev->dev_addr); out: memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); return 0; } //基本交给real dev处理 static int vnic_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct net_device *real_dev = vnic_dev_info(dev)->real_dev; const struct net_device_ops *ops = real_dev->netdev_ops; struct ifreq ifrr; int err = -EOPNOTSUPP; strncpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ); ifrr.ifr_ifru = ifr->ifr_ifru; switch (cmd) { case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: if (netif_device_present(real_dev) && ops->ndo_do_ioctl) err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd); break; } if (!err) ifr->ifr_ifru = ifrr.ifr_ifru; return err; } //看一下程序,主要就是设置多播和混杂 static void vnic_dev_change_rx_flags(struct net_device *dev, int change) { struct net_device *real_dev = vnic_dev_info(dev)->real_dev; if (change & IFF_ALLMULTI) dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); if (change & IFF_PROMISC) dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); } static void vnic_dev_set_rx_mode(struct net_device *vnic_dev) { /* dev_mc_sync:同步一个设备的多播列表到另一个设备 dev_uc_sync:同步一个设备的单播列表到另一个设备 虚拟网卡只能把这些给真实的网卡去做 */ dev_mc_sync(vnic_dev_info(vnic_dev)->real_dev, vnic_dev); dev_uc_sync(vnic_dev_info(vnic_dev)->real_dev, vnic_dev); } static int vnic_dev_init(struct net_device *dev) { struct net_device *real_dev = vnic_dev_info(dev)->real_dev; netif_carrier_off(dev);//通知链接断开 /* IFF_BROADCAST|IFF_MULTICAST; ??? */ /* 刚装载驱动时,网卡还在关闭状态,所以要清楚一些标志 IFF_UP:接口已打开 IFF_PROMISC:混杂模式 IFF_ALLMULTI:接受所有组播报文 IFF_MASTER和IFF_SLAVE:负载平衡器的主从机 */ dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_MASTER | IFF_SLAVE); dev->iflink = real_dev->ifindex;//接口引索,独一无二的。 /* state赋值,通用网络排队层的几个私有标志要清楚 */ dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); /* 下面的hw设置: 报文校验,发散汇聚IO使用,TCP分段卸载,可以对高端内存DMA, SCTP校验和卸载 */ dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | NETIF_F_HIGHDMA | NETIF_F_SCTP_CSUM; /* TSO(TCP Segmentation Offload),是一种利用网卡对TCP数据包分片,减轻CPU负荷的一种技术, 有时也被叫做 LSO (Large segment offload) ,TSO是针对TCP的,UFO是针对UDP的。如果硬件 支持 TSO功能,同时也需要硬件支持的TCP校验计算和分散/聚集 (Scatter Gather) 功能。 GSO(Generic Segmentation Offload),它比TSO更通用,基本思想就是尽可能的推迟数据分片直 至发送到网卡驱动之前,此时会检查网卡是否支持分片功能(如TSO、UFO),如果支持直接发送到 网卡,如果不支持就进行分片后再发往网卡。这样大数据包只需走一次协议栈,而不是被分割成 几个数据包分别走,这就提高了效率。 LRO(Large Receive Offload),通过将接收到的多个TCP数据聚合成一个大的数据包,然后传递给 网络协议栈处理,以减少上层协议栈处理 开销,提高系统接收TCP数据包的能力。 GRO(Generic Receive Offload),基本思想跟LRO类似,克服了LRO的一些缺点,更通用。后续的 驱动都使用GRO的接口,而不是LRO。 RSS(Receive Side Scaling),是一项网卡的新特性,俗称多队列。具备多个RSS队列的网卡,可 以将不同的网络流分成不同的队列,再分别将这些队列分配到多个CPU核心上进行处理,从而将负 荷分散,充分利用多核处理器的能力。 */ dev->gso_max_size = real_dev->gso_max_size; /* IPv6的共享卡相关的东西 */ dev->dev_id = real_dev->dev_id; //下面判断地址是不是为0,是的话复制真实网卡地址 if (is_zero_ether_addr(dev->dev_addr)) memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len); if (is_zero_ether_addr(dev->broadcast)) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); //硬件可能需要的头尾空间 dev->needed_headroom = real_dev->needed_headroom; dev->needed_tailroom = real_dev->needed_tailroom; //dev->hard_header_len = real_dev->hard_header_len + VNIC_HLEN;//这里可以加上我们自己的协议头 //vnic_pcpu_stats记录网卡状态,分配用的是percpu,这样每个cpu都有自己的copy //就不需要加锁 vnic_dev_info(dev)->vnic_pcpu_stats = alloc_percpu(struct vnic_pcpu_stats); if (!vnic_dev_info(dev)->vnic_pcpu_stats) return -ENOMEM; return 0; } static void vnic_dev_uninit(struct net_device *dev) { struct vnic_dev_info *vnic = vnic_dev_info(dev); free_percpu(vnic->vnic_pcpu_stats); vnic->vnic_pcpu_stats = NULL;//对应上面的alloc } static u32 vnic_dev_fix_features(struct net_device *dev, u32 features) { struct net_device *real_dev = vnic_dev_info(dev)->real_dev; u32 old_features = features; features &= real_dev->features; if (old_features & NETIF_F_SOFT_FEATURES)//多变的特征--没有特殊的硬件要求 features |= old_features & NETIF_F_SOFT_FEATURES; if (dev_ethtool_get_rx_csum(real_dev)) features |= NETIF_F_RXCSUM;//接收校验和卸载 features |= NETIF_F_LLTX;//不要用无锁TX return features; } static int vnic_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { const struct vnic_dev_info *vnic = vnic_dev_info(dev); return dev_ethtool_get_settings(vnic->real_dev, cmd); } static void vnic_ethtool_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { strcpy(info->driver, "vnic"); strcpy(info->version, "v1.0"); strcpy(info->fw_version, "N/A"); } static struct rtnl_link_stats64 *vnic_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { if (vnic_dev_info(dev)->vnic_pcpu_stats) { struct vnic_pcpu_stats *p; u32 rx_errors = 0, tx_dropped = 0; int i; for_each_possible_cpu(i) {//差不多就是for ((cpu) = 0; (cpu) < 1; (cpu)++) u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes; unsigned int start; p = per_cpu_ptr(vnic_dev_info(dev)->vnic_pcpu_stats, i);//获取每-cpu量。 do { start = u64_stats_fetch_begin_bh(&p->syncp); rxpackets = p->rx_packets; rxbytes = p->rx_bytes; rxmulticast = p->rx_multicast; txpackets = p->tx_packets; txbytes = p->tx_bytes; } while (u64_stats_fetch_retry_bh(&p->syncp, start)); stats->rx_packets += rxpackets; stats->rx_bytes += rxbytes; stats->multicast += rxmulticast; stats->tx_packets += txpackets; stats->tx_bytes += txbytes; /* rx_errors & tx_dropped are u32 */ rx_errors += p->rx_errors; tx_dropped += p->tx_dropped;//统计信息。 } stats->rx_errors = rx_errors; stats->tx_dropped = tx_dropped; //上面就是统计每个cpu的网卡应用信息 } return stats; } static const struct ethtool_ops vnic_ethtool_ops = { .get_settings = vnic_ethtool_get_settings, .get_drvinfo = vnic_ethtool_get_drvinfo, .get_link = ethtool_op_get_link, }; static const struct net_device_ops vnic_netdev_ops = { //change_mtu.对应ioctl的SIOCSIFMTU .ndo_change_mtu = vnic_dev_change_mtu, //init和uninit在注册和注销时调用 .ndo_init = vnic_dev_init, .ndo_uninit = vnic_dev_uninit, //open和stop,例如在ifconfig X up会通过sock_ioctl最终调用__dev_open .ndo_open = vnic_dev_open, .ndo_stop = vnic_dev_stop, //发送 .ndo_start_xmit = vnic_dev_hard_start_xmit, //媒体访问地址是否有效 .ndo_validate_addr = eth_validate_addr, //设置mac地址 .ndo_set_mac_address = vnic_dev_set_mac_address, //更改设备地址列表过滤时调用 .ndo_set_rx_mode = vnic_dev_set_rx_mode, .ndo_set_multicast_list = vnic_dev_set_rx_mode, //对应ioctl的SIOCSIFFLAGS .ndo_change_rx_flags = vnic_dev_change_rx_flags, .ndo_do_ioctl = vnic_dev_ioctl, //ndo_get_stats64会在cat /proc/net/dev下看到 .ndo_get_stats64 = vnic_dev_get_stats64, //网卡设备特征 .ndo_fix_features = vnic_dev_fix_features, }; void vnic_setup(struct net_device *dev) { ether_setup(dev); dev->tx_queue_len = 0; dev->netdev_ops = &vnic_netdev_ops; dev->ethtool_ops = &vnic_ethtool_ops; memset(dev->broadcast, 0, ETH_ALEN); } struct net_device *ndev[VNIC_CNT]; static __exit void vnic_cleanup(void) { int n; unregister_vnic_dev(); for (n = 0; (n < VNIC_CNT) && (ndev[n] != NULL); n++) { free_netdev(ndev[n]); ndev[n] = NULL; } } static __init int vnic_init_module(void) { char name[IFNAMSIZ]; int err, n; for (n = 0; n < VNIC_CNT; n++) { snprintf(name, IFNAMSIZ, "vnic%d", n); ndev[n] = alloc_netdev(sizeof(struct vnic_dev_info), name, vnic_setup); if (ndev[n] == NULL) { err = -ENOBUFS; goto out_free_newdev; } vnic_dev_info(ndev[n])->vnic_id = n; err = register_vnic_dev(ndev[n]); if (err < 0) { free_netdev(ndev[n]); ndev[n] = NULL; goto out_free_newdev; } } return 0; out_free_newdev: vnic_cleanup(); return err; } module_init(vnic_init_module); module_exit(vnic_cleanup); MODULE_LICENSE("GPL");
send.c,如果在pc上就define TEXT_X86,arm上就关掉
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <sys/socket.h> #include <linux/if_packet.h> #include <netdb.h> #include <errno.h> #include <arpa/inet.h> #include <sys/ioctl.h> #include <pthread.h> #include <net/ethernet.h> #include <netinet/ether.h> #include <net/if.h> #include <netinet/ip.h> #include<netinet/tcp.h> static int sockfd; static struct sockaddr_ll peer_addr; static unsigned char vnic_num = 0;//选择哪个网卡 #define X86_TEST //pc上运行定义它 #ifdef X86_TEST #define ETH_P_VNIC 0x8877 #endif struct arp_packet { struct ether_header eh; short data; }; static void send_pkt(void) { #ifdef X86_TEST unsigned char src_mac[6] = {0x00, 0x0c, 0x29, 0x15, 0xd9, 0xc5}; //发送端地址 unsigned char dst_mac[6] = {0xc0, 0x6f, 0x65, 0xaa, 0xdf, 0x61}; //接收端地址 #else unsigned char src_mac[6] = {0xc0, 0x6f, 0x65, 0xaa, 0xdf, 0x61}; //发送端地址 unsigned char dst_mac[6] = {0x00, 0x0c, 0x29, 0x15, 0xd9, 0xc5}; //接收端地址 #endif struct arp_packet frame; memset(&frame, 0, sizeof(struct arp_packet)); memcpy(frame.eh.ether_dhost, dst_mac, 6); memcpy(frame.eh.ether_shost, src_mac, 6); #ifdef X86_TEST//pc上只能在应用层去加 frame.eh.ether_type = htons(ETH_P_VNIC); frame.data = htons(('w' << 8) | vnic_num); #endif sendto(sockfd, &frame, sizeof(frame), 0, (struct sockaddr*)&peer_addr, sizeof(peer_addr)); } int main(int argc, char **argv) { struct ifreq req; if (argc <= 2) { return -1; } sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_VNIC)); if(sockfd == -1) { perror("socket()"); } memset(&peer_addr, 0, sizeof(peer_addr)); peer_addr.sll_family = AF_PACKET; strcpy(req.ifr_name, argv[1]); vnic_num = (*argv[2] - '0'); if(ioctl(sockfd, SIOCGIFINDEX, &req) != 0) { perror("ioctl()"); } peer_addr.sll_ifindex = req.ifr_ifindex; peer_addr.sll_protocol = htons(ETH_P_ARP); send_pkt(); close(sockfd); return 0; }
先在arm板上装载驱动:
下面就不遮盖MAC地址了
pc上运行
eth0表示选择的网卡,0是我自定义协议中的数,表示下面arm板的对应vnic虚拟网卡
arm上显示
我们看一下/proc
可以看到vnic0中显示的packets加1
下面我们再给vnic1发
看一下arm板
可以看到vnic1也接收了一个包
现在我们反过来,arm发,pc收,由于pc端的内核没有加这个vnic,我只能再应用层写个rcv
rcv.c
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <sys/socket.h> #include <linux/if_packet.h> #include <netdb.h> #include <errno.h> #include <arpa/inet.h> #include <sys/ioctl.h> #include <pthread.h> #include <net/ethernet.h> #include <netinet/ether.h> #include <net/if.h> #include <netinet/ip.h> int sockfd; struct sockaddr_ll peer_addr; #define ETH_P_VNIC 0x8877 int main(int argc, char **argv) { struct ifreq req; unsigned char buff[512]; struct sockaddr_in clientAddr; int len, n; if (argc == 1) { return -1; } sockfd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_VNIC)); if (sockfd == -1) { perror("socket()"); } memset(&peer_addr, 0, sizeof(peer_addr)); peer_addr.sll_family = AF_PACKET; strcpy(req.ifr_name, argv[1]); if (ioctl(sockfd, SIOCGIFINDEX, &req) != 0) { perror("ioctl()"); } peer_addr.sll_ifindex = req.ifr_ifindex; peer_addr.sll_protocol = htons(ETH_P_VNIC); len = 0; memset(buff, 0, 512); while (1) { n = recvfrom(sockfd, buff, 511, 0, (struct sockaddr *)&clientAddr, &len); if (len > 0) { for (n = 0; n < len; n++) { printf("%x ", buff[n]); } printf("\n"); len = 0; memset(buff, 0, 512); } } close(sockfd); return 0; }
gcc -o rcv rcv.c
然后,先运行pc端
./rcv
再arm端运行:
pc会看到: