接着之前的代码,继续添加功能。
之前介绍了接收的流程,这里介绍发送的流程。
其实是类似的,是组包的过程,贴代码
static void create_eth_ip_udp_pkt(uint8_t *msg, size_t total_len, uint8_t *dst_mac,
uint32_t src_ip, uint32_t dst_ip, uint16_t udp_src_port, uint16_t udp_dst_port,
uint8_t *data, int length) {
struct rte_ether_hdr *eth = (struct rte_ether_hdr *)msg;
rte_memcpy(eth->d_addr.addr_bytes, dst_mac, RTE_ETHER_ADDR_LEN);
rte_memcpy(eth->s_addr.addr_bytes, g_src_mac_addr, RTE_ETHER_ADDR_LEN);
eth->ether_type = htons(RTE_ETHER_TYPE_IPV4);
struct rte_ipv4_hdr *ip = (struct rte_ipv4_hdr *)(eth + 1);
size_t ip_len = total_len - sizeof(struct rte_ether_hdr);
ip->version_ihl = 0x45;
ip->type_of_service = 0;
ip->total_length = htons((uint16_t)ip_len);
ip->packet_id = 0;
ip->fragment_offset = 0;
ip->time_to_live = 64;
ip->next_proto_id = IPPROTO_UDP;
ip->src_addr = src_ip;
ip->dst_addr = dst_ip;
ip->hdr_checksum = 0;
ip->hdr_checksum = rte_ipv4_cksum(ip);
struct rte_udp_hdr *udp = (struct rte_udp_hdr *)(ip + 1);
//size_t udp_len = ip_len - sizeof(struct rte_ipv4_hdr);
udp->src_port = htons(udp_src_port);
udp->dst_port = htons(udp_dst_port);
udp->dgram_len = htons((uint16_t)(length + sizeof(struct rte_udp_hdr)));
uint32_t *payload = (uint32_t *)(udp + 1);
rte_memcpy(payload, data, length);
udp->dgram_cksum = 0;
udp->dgram_cksum = rte_ipv4_udptcp_cksum(ip, udp);
}
static void do_send_udp(struct rte_mempool *mbuf_pool, unsigned char *data, int length) {
//length是纯数据的长度,总长度需要加上各个协议头的长度
const unsigned eth_total_len = length + 42; //在dpdk中,这三个协议头,加起来是42字节
struct rte_mbuf *mbuf = rte_pktmbuf_alloc(mbuf_pool);
if (!mbuf) {
rte_exit(EXIT_FAILURE, "Cannot alloc mbuf\n");
}
mbuf->pkt_len = eth_total_len;
mbuf->data_len = eth_total_len;
uint8_t *pkt_data = rte_pktmbuf_mtod(mbuf, uint8_t *);
const int udp_port = UDP_PORT;
//g_开头,是全局变量
create_eth_ip_udp_pkt(pkt_data, eth_total_len, g_dest_mac_mac_addr,
g_src_ip, g_dest_ip, udp_port, udp_port, data, length);
rte_eth_tx_burst(g_dpdkPortId, 0, &mbuf, 1);
rte_pktmbuf_free(mbuf);
}
arp协议的实现中,主要是请求和回应。
请求方面,值得说明的是,arp请求有一种广播方式。请求方会向局域网内广播一个带有目的ip地址却没有目的mac地址的请求包,局域网内所有机器都会收到这个包,但是只有指定ip的机器会回应。请求方收到包后,就会把这条记录copy到arp表中。
局域网内部通信的时候,用的是mac地址,上层的地址是没有用的,所以需要转换。但是mac地址出了网关就没有用了,所以还是需要ip等上层地址。
还是贴代码
int main()
{
struct rte_mbuf *mbufs[BURST_SIZE];
unsigned num_recvd = rte_eth_rx_burst(g_dpdkPortId, DPDK_QUEUE_ID_RX, mbufs, BURST_SIZE);
if (unlikely(num_recvd > BURST_SIZE)) {
rte_exit(EXIT_FAILURE, "Error receiving from eth\n");
}
unsigned i = 0;
for (i = 0;i < num_recvd;i ++) {
struct rte_ether_hdr *ehdr = rte_pktmbuf_mtod(mbufs[i], struct rte_ether_hdr*);
if (ehdr->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {
struct rte_arp_hdr *arp_hdr = rte_pktmbuf_mtod_offset(mbufs[i], struct rte_arp_hdr *, sizeof(struct rte_ether_hdr));
if (arp_hdr->arp_data.arp_tip == g_src_ip) { //没有这一行就是arp欺骗
// sip change to dip and dip change to sip
// dmac change to smac and smac change to dmac
printf("do_send_arp\n");
do_send_arp(pktmbuf_pool, arp_hdr->arp_data.arp_sha.addr_bytes, arp_hdr->arp_data.arp_tip, arp_hdr->arp_data.arp_sip);
}
}
}
}
上面是收到包之后的处理,收到arp包之后,再原路回发,把收到arp包的源地址和目的地址改一下。具体组包过程
static void create_eth_arp_pkt(uint8_t *msg, uint8_t *dst_mac,
uint32_t src_ip, uint32_t dst_ip) {
struct rte_ether_hdr *eth = (struct rte_ether_hdr *)msg;
rte_memcpy(eth->d_addr.addr_bytes, dst_mac, RTE_ETHER_ADDR_LEN);
rte_memcpy(eth->s_addr.addr_bytes, g_src_mac_addr, RTE_ETHER_ADDR_LEN);
eth->ether_type = htons(RTE_ETHER_TYPE_ARP);
struct rte_arp_hdr *arp = (struct rte_arp_hdr *)(eth + 1);
arp->arp_hardware = htons(1);
//协议格式
arp->arp_protocol = htons(RTE_ETHER_TYPE_IPV4); //ip地址的协议
//硬件地址的长度,即mac地址的长度
arp->arp_hlen = RTE_ETHER_ADDR_LEN;
arp->arp_plen = sizeof(uint32_t); //ip地址长度
//操作码,1请求2回应
arp->arp_opcode = htons(2);
rte_memcpy(arp->arp_data.arp_sha.addr_bytes, g_src_mac_addr, RTE_ETHER_ADDR_LEN);
rte_memcpy(arp->arp_data.arp_tha.addr_bytes, dst_mac, RTE_ETHER_ADDR_LEN);
arp->arp_data.arp_sip = src_ip;
arp->arp_data.arp_tip = dst_ip;
struct in_addr addr;
char buf[RTE_ETHER_ADDR_FMT_SIZE];
rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, (struct rte_ether_addr*)&arp->arp_data.arp_sha);
addr.s_addr = arp->arp_data.arp_sip;
printf(" arp src: %s, mac: %s", inet_ntoa(addr), buf);
rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, (struct rte_ether_addr*)&arp->arp_data.arp_tha);
addr.s_addr = arp->arp_data.arp_tip;
printf(", dst: %s, mac: %s \n", inet_ntoa(addr), buf);
}
static void do_send_arp(struct rte_mempool *mbuf_pool, uint8_t *dst_mac, uint32_t sip, uint32_t dip) {
const unsigned total_length = sizeof(struct rte_ether_hdr) + sizeof(struct rte_arp_hdr);
struct rte_mbuf *mbuf = rte_pktmbuf_alloc(mbuf_pool);
if (!mbuf) {
rte_exit(EXIT_FAILURE, "Cannot alloc mbuf\n");
}
mbuf->pkt_len = total_length;
mbuf->data_len = total_length;
uint8_t *pkt_data = rte_pktmbuf_mtod(mbuf, uint8_t *);
create_eth_arp_pkt(pkt_data, dst_mac, sip, dip);
rte_eth_tx_burst(g_dpdkPortId, 0, &mbuf, 1);
rte_pktmbuf_free(mbuf);
}
值得注意的是,arp协议工作在数据链路层,而这个组包过程,让人感觉是工作在网络层,这是因为,arp协议是在以太网基础之上进行工作的,在代码层次,用了第三层的数据结构来处理,这就造成了这个错觉。
kni(Kernel NIC Interface)内核网卡接口,是dpdk允许用户态和内核态交换报文的解决方案,模拟了一个虚拟的网口,提供dpdk的应用程序和linux内核之间通讯。kni接口允许报文从用户态接收后转发到linux协议栈去。
这里主要介绍基本操作,不详细介绍原理。简单来说,就是dpdk把数据包再丢给内核,由内核处理。
//用于回调的函数
int kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[]);
int kni_config_network_if(uint16_t port_id, uint8_t if_up);
int kni_change_mtu(uint16_t port_id, unsigned int new_mtu);
int kni_change_mtu(uint16_t port_id, unsigned int new_mtu) {
int ret;
uint16_t nb_txd = TX_RING_SIZE;
uint16_t nb_rxd = RX_RING_SIZE;
struct rte_eth_conf conf;
struct rte_eth_dev_info dev_info;
struct rte_eth_rxconf rxq_conf;
if (!rte_eth_dev_is_valid_port(port_id)) {
printf("Invalid port id %d\n", port_id);
return -EINVAL;
}
printf("Change MTU of port %d to %u\n", port_id, new_mtu);
/* Stop specific port */
rte_eth_dev_stop(port_id);
memcpy(&conf, &port_conf_default, sizeof(conf));
/* Set new MTU */
if (new_mtu > RTE_ETHER_MAX_LEN)
conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
else
conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
/* mtu + length of header + length of FCS = max pkt length */
conf.rxmode.max_rx_pkt_len = new_mtu + 14 + 4;
ret = rte_eth_dev_configure(port_id, 1, 1, &conf);
if (ret < 0) {
printf("Fail to reconfigure port %d\n", port_id);
return ret;
}
ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Could not adjust number of descriptors "
"for port%u (%d)\n", (unsigned int)port_id,
ret);
rte_eth_dev_info_get(port_id, &dev_info);
rxq_conf = dev_info.default_rxconf;
rxq_conf.offloads = conf.rxmode.offloads;
ret = rte_eth_rx_queue_setup(port_id, 0, nb_rxd,
rte_eth_dev_socket_id(port_id), &rxq_conf, pktmbuf_pool);
if (ret < 0) {
printf("Fail to setup Rx queue of port %d\n",
port_id);
return ret;
}
// Set up TX queue.
struct rte_eth_txconf txq_conf = dev_info.default_txconf;
txq_conf.offloads = conf.txmode.offloads;
if (rte_eth_tx_queue_setup(port_id, 0, nb_txd,
rte_eth_dev_socket_id(port_id), &txq_conf) < 0) {
rte_exit(EXIT_FAILURE, "Couldn't setup TX queue.\n");
}
/* Restart specific port */
ret = rte_eth_dev_start(port_id);
if (ret < 0) {
printf("Fail to restart port %d\n", port_id);
return ret;
}
//设置混杂模式,dpdk既能接收到外界的数据,又能响应kni过来的数据
rte_eth_promiscuous_enable(port_id);
return 0;
}
int kni_config_network_if(uint16_t port_id, uint8_t if_up) {
int ret = 0;
if (!rte_eth_dev_is_valid_port(port_id)) {
printf("Invalid port id %d\n", port_id);
return -EINVAL;
}
printf("Configure network interface of %d %s\n",
port_id, if_up ? "up" : "down");
rte_atomic32_inc(&kni_pause);
if (if_up != 0) { /* Configure network interface up */
rte_eth_dev_stop(port_id);
ret = rte_eth_dev_start(port_id);
} else /* Configure network interface down */
rte_eth_dev_stop(port_id);
rte_atomic32_dec(&kni_pause);
if (ret < 0)
printf("Failed to start port %d\n", port_id);
return ret;
}
int kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[]) {
int ret = 0;
if (!rte_eth_dev_is_valid_port(port_id)) {
printf("Invalid port id %d\n", port_id);
return -EINVAL;
}
printf("Configure mac address of %d\n", port_id);
char buf[RTE_ETHER_ADDR_FMT_SIZE];
rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, (struct rte_ether_addr*)mac_addr);
printf("\tAddress: %s\n", buf);
ret = rte_eth_dev_default_mac_addr_set(port_id,
(struct rte_ether_addr *)mac_addr);
if (ret < 0)
printf("Failed to config mac_addr for port %d\n",
port_id);
return ret;
}
static int init_kni(void) {
uint16_t num_of_kni_ports = rte_eth_dev_count_avail();
if (num_of_kni_ports != 1)
rte_exit(EXIT_FAILURE, "No supported Ethernet device found\n");
rte_kni_init(num_of_kni_ports);
struct rte_kni_conf conf;
//初始化完成后,可以用ifconfig命令查看,ip、mac地址与dpdk的相同
snprintf(conf.name, RTE_KNI_NAMESIZE, "vEth%u", g_dpdkPortId);
conf.group_id = (uint16_t)g_dpdkPortId;
conf.mbuf_size = MAX_PACKET_SIZE;
struct rte_eth_dev_info dev_info;
memset(&dev_info, 0, sizeof(dev_info));
rte_eth_dev_info_get(g_dpdkPortId, &dev_info);
rte_eth_macaddr_get(g_dpdkPortId, (struct rte_ether_addr*)&conf.mac_addr);
rte_eth_dev_get_mtu(g_dpdkPortId, &conf.mtu);
struct rte_kni_ops ops;
memset(&ops, 0, sizeof(ops));
ops.port_id = g_dpdkPortId;
ops.change_mtu = kni_change_mtu;
ops.config_network_if = kni_config_network_if;
ops.config_mac_address = kni_config_mac_address;
kni = rte_kni_alloc(pktmbuf_pool, &conf, &ops);
if (!kni)
rte_exit(EXIT_FAILURE, "Fail to create kni for "
"port: %d\n", g_dpdkPortId);
return 0;
}
static int free_kni(uint16_t port_id) {
rte_kni_release(kni);
rte_eth_dev_stop(port_id);
return 0;
}
int main()
{
init_kni();
struct rte_mbuf *pkts_burst[BURST_SIZE];
unsigned num_rx_recvd = rte_kni_rx_burst(kni, pkts_burst, BURST_SIZE);
if (unlikely(num_rx_recvd > BURST_SIZE)) {
printf("Error receiving from KNI\n");
continue;
}
unsigned j = 0;
for (j = 0;j < num_rx_recvd;j ++) {
struct rte_ether_hdr *ehdr = rte_pktmbuf_mtod(pkts_burst[j], struct rte_ether_hdr*);
printf(" kni : ehdr->ether_type --> %x\n", ntohs(ehdr->ether_type));
}
unsigned nb_tx = rte_eth_tx_burst(g_dpdkPortId, 0, pkts_burst, (uint16_t)num_rx_recvd);
burst_free_mbufs(pkts_burst, num_rx_recvd);
}
执行代码后,可以通过ifconfig -a
查看kni的设置,注意此时没有真正工作,需要设置ip地址和mac地址(即使mac地址已存在)
# 需要设置和dpdk一样的ip和mac
ifconfig vEth0 192.168.0.120 hw ether 00:0c:29:85:2e:88 up