dpdk实现arp和kni

接着之前的代码,继续添加功能。

数据发送

之前介绍了接收的流程,这里介绍发送的流程。
其实是类似的,是组包的过程,贴代码

static void create_eth_ip_udp_pkt(uint8_t *msg, size_t total_len, uint8_t *dst_mac,
    uint32_t src_ip, uint32_t dst_ip, uint16_t udp_src_port, uint16_t udp_dst_port, 
    uint8_t *data, int length) {

	struct rte_ether_hdr *eth = (struct rte_ether_hdr *)msg;
    rte_memcpy(eth->d_addr.addr_bytes, dst_mac, RTE_ETHER_ADDR_LEN);
    rte_memcpy(eth->s_addr.addr_bytes, g_src_mac_addr, RTE_ETHER_ADDR_LEN);
    eth->ether_type = htons(RTE_ETHER_TYPE_IPV4);

    struct rte_ipv4_hdr *ip = (struct rte_ipv4_hdr *)(eth + 1);
    size_t ip_len = total_len - sizeof(struct rte_ether_hdr);
	ip->version_ihl = 0x45;
	ip->type_of_service = 0;
	ip->total_length = htons((uint16_t)ip_len);
    ip->packet_id = 0;
	ip->fragment_offset = 0;
    ip->time_to_live = 64;
	ip->next_proto_id = IPPROTO_UDP;
	
	ip->src_addr = src_ip;
	ip->dst_addr = dst_ip;
	
	ip->hdr_checksum = 0;
    ip->hdr_checksum =  rte_ipv4_cksum(ip);

    struct rte_udp_hdr *udp = (struct rte_udp_hdr *)(ip + 1);
    //size_t udp_len = ip_len - sizeof(struct rte_ipv4_hdr);
    udp->src_port = htons(udp_src_port);
    udp->dst_port = htons(udp_dst_port);
    udp->dgram_len = htons((uint16_t)(length + sizeof(struct rte_udp_hdr)));

    uint32_t *payload = (uint32_t *)(udp + 1);
    rte_memcpy(payload, data, length);

	udp->dgram_cksum = 0;
	udp->dgram_cksum = rte_ipv4_udptcp_cksum(ip, udp);

}



static void do_send_udp(struct rte_mempool *mbuf_pool, unsigned char *data, int length) {

    //length是纯数据的长度,总长度需要加上各个协议头的长度
	const unsigned eth_total_len = length + 42;  //在dpdk中,这三个协议头,加起来是42字节

	struct rte_mbuf *mbuf = rte_pktmbuf_alloc(mbuf_pool);
	if (!mbuf) {
		rte_exit(EXIT_FAILURE, "Cannot alloc mbuf\n");
	}

	mbuf->pkt_len = eth_total_len;
    mbuf->data_len = eth_total_len;

	uint8_t *pkt_data = rte_pktmbuf_mtod(mbuf, uint8_t *);
	const int udp_port = UDP_PORT;

    //g_开头,是全局变量
	create_eth_ip_udp_pkt(pkt_data, eth_total_len, g_dest_mac_mac_addr, 
		g_src_ip, g_dest_ip, udp_port, udp_port, data, length);

	rte_eth_tx_burst(g_dpdkPortId, 0, &mbuf, 1);

	rte_pktmbuf_free(mbuf);

}

实现收发arp

arp协议的实现中,主要是请求和回应。
请求方面,值得说明的是,arp请求有一种广播方式。请求方会向局域网内广播一个带有目的ip地址却没有目的mac地址的请求包,局域网内所有机器都会收到这个包,但是只有指定ip的机器会回应。请求方收到包后,就会把这条记录copy到arp表中。
局域网内部通信的时候,用的是mac地址,上层的地址是没有用的,所以需要转换。但是mac地址出了网关就没有用了,所以还是需要ip等上层地址。
还是贴代码

int main()
{
        struct rte_mbuf *mbufs[BURST_SIZE];
		unsigned num_recvd = rte_eth_rx_burst(g_dpdkPortId, DPDK_QUEUE_ID_RX, mbufs, BURST_SIZE);
		if (unlikely(num_recvd > BURST_SIZE)) {
			rte_exit(EXIT_FAILURE, "Error receiving from eth\n");
		}

		unsigned i = 0;
		for (i = 0;i < num_recvd;i ++) {
			
			struct rte_ether_hdr *ehdr = rte_pktmbuf_mtod(mbufs[i], struct rte_ether_hdr*);

			if (ehdr->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) {

				struct rte_arp_hdr *arp_hdr = rte_pktmbuf_mtod_offset(mbufs[i], struct rte_arp_hdr *, sizeof(struct rte_ether_hdr));

				if (arp_hdr->arp_data.arp_tip == g_src_ip) {  //没有这一行就是arp欺骗

					// sip change to dip and dip change to sip
					// dmac change to smac and smac change to dmac
					printf("do_send_arp\n");
					do_send_arp(pktmbuf_pool, arp_hdr->arp_data.arp_sha.addr_bytes, arp_hdr->arp_data.arp_tip, arp_hdr->arp_data.arp_sip);
					
				}
            }
        }
}

上面是收到包之后的处理,收到arp包之后,再原路回发,把收到arp包的源地址和目的地址改一下。具体组包过程

static void create_eth_arp_pkt(uint8_t *msg, uint8_t *dst_mac,
    uint32_t src_ip, uint32_t dst_ip) {

	
	struct rte_ether_hdr *eth = (struct rte_ether_hdr *)msg;
	rte_memcpy(eth->d_addr.addr_bytes, dst_mac, RTE_ETHER_ADDR_LEN);
	rte_memcpy(eth->s_addr.addr_bytes, g_src_mac_addr, RTE_ETHER_ADDR_LEN);
	eth->ether_type = htons(RTE_ETHER_TYPE_ARP);

	struct rte_arp_hdr *arp = (struct rte_arp_hdr *)(eth + 1);

	arp->arp_hardware = htons(1);
    //协议格式
	arp->arp_protocol = htons(RTE_ETHER_TYPE_IPV4);  //ip地址的协议
	//硬件地址的长度,即mac地址的长度
    arp->arp_hlen = RTE_ETHER_ADDR_LEN;
	arp->arp_plen = sizeof(uint32_t);  //ip地址长度
    //操作码,1请求2回应
	arp->arp_opcode = htons(2);

	rte_memcpy(arp->arp_data.arp_sha.addr_bytes, g_src_mac_addr, RTE_ETHER_ADDR_LEN);
	rte_memcpy(arp->arp_data.arp_tha.addr_bytes, dst_mac, RTE_ETHER_ADDR_LEN);

	arp->arp_data.arp_sip = src_ip;
	arp->arp_data.arp_tip = dst_ip;

	
	struct in_addr addr;

	char buf[RTE_ETHER_ADDR_FMT_SIZE];
	rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, (struct rte_ether_addr*)&arp->arp_data.arp_sha);

	addr.s_addr = arp->arp_data.arp_sip;
	printf(" arp src: %s, mac: %s", inet_ntoa(addr), buf);

	rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, (struct rte_ether_addr*)&arp->arp_data.arp_tha);
	addr.s_addr = arp->arp_data.arp_tip;
	printf(", dst: %s, mac: %s \n", inet_ntoa(addr), buf);

	

}


static void do_send_arp(struct rte_mempool *mbuf_pool, uint8_t *dst_mac, uint32_t sip, uint32_t dip) {

	const unsigned total_length = sizeof(struct rte_ether_hdr) + sizeof(struct rte_arp_hdr);

	struct rte_mbuf *mbuf = rte_pktmbuf_alloc(mbuf_pool);
	if (!mbuf) {
		rte_exit(EXIT_FAILURE, "Cannot alloc mbuf\n");
	}

	mbuf->pkt_len = total_length;
    mbuf->data_len = total_length;
	
	uint8_t *pkt_data = rte_pktmbuf_mtod(mbuf, uint8_t *);
	create_eth_arp_pkt(pkt_data, dst_mac, sip, dip);

	rte_eth_tx_burst(g_dpdkPortId, 0, &mbuf, 1);
	
	rte_pktmbuf_free(mbuf);

}

值得注意的是,arp协议工作在数据链路层,而这个组包过程,让人感觉是工作在网络层,这是因为,arp协议是在以太网基础之上进行工作的,在代码层次,用了第三层的数据结构来处理,这就造成了这个错觉。

kni操作

kni(Kernel NIC Interface)内核网卡接口,是dpdk允许用户态和内核态交换报文的解决方案,模拟了一个虚拟的网口,提供dpdk的应用程序和linux内核之间通讯。kni接口允许报文从用户态接收后转发到linux协议栈去。
这里主要介绍基本操作,不详细介绍原理。简单来说,就是dpdk把数据包再丢给内核,由内核处理。

//用于回调的函数
int kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[]);
int kni_config_network_if(uint16_t port_id, uint8_t if_up);
int kni_change_mtu(uint16_t port_id, unsigned int new_mtu);

int kni_change_mtu(uint16_t port_id, unsigned int new_mtu) {

	int ret;
	uint16_t nb_txd = TX_RING_SIZE;
	uint16_t nb_rxd = RX_RING_SIZE;
	
	struct rte_eth_conf conf;
	struct rte_eth_dev_info dev_info;
	struct rte_eth_rxconf rxq_conf;

	if (!rte_eth_dev_is_valid_port(port_id)) {
		printf("Invalid port id %d\n", port_id);
		return -EINVAL;
	}

	printf("Change MTU of port %d to %u\n", port_id, new_mtu);

	/* Stop specific port */
	rte_eth_dev_stop(port_id);

	memcpy(&conf, &port_conf_default, sizeof(conf));
	/* Set new MTU */
	if (new_mtu > RTE_ETHER_MAX_LEN)
		conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
	else
		conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;

	/* mtu + length of header + length of FCS = max pkt length */
 	conf.rxmode.max_rx_pkt_len = new_mtu + 14 + 4;
	
	ret = rte_eth_dev_configure(port_id, 1, 1, &conf);
	if (ret < 0) {
		printf("Fail to reconfigure port %d\n", port_id);
		return ret;
	}

	ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd);
	if (ret < 0)
		rte_exit(EXIT_FAILURE, "Could not adjust number of descriptors "
				"for port%u (%d)\n", (unsigned int)port_id,
				ret);

	rte_eth_dev_info_get(port_id, &dev_info);
	rxq_conf = dev_info.default_rxconf;
	rxq_conf.offloads = conf.rxmode.offloads;
	ret = rte_eth_rx_queue_setup(port_id, 0, nb_rxd,
		rte_eth_dev_socket_id(port_id), &rxq_conf, pktmbuf_pool);
	if (ret < 0) {
		printf("Fail to setup Rx queue of port %d\n",
				port_id);
		return ret;
	}

	// Set up TX queue.
	struct rte_eth_txconf txq_conf = dev_info.default_txconf;
	txq_conf.offloads = conf.txmode.offloads;
	if (rte_eth_tx_queue_setup(port_id, 0, nb_txd,
            rte_eth_dev_socket_id(port_id), &txq_conf) < 0) {
        rte_exit(EXIT_FAILURE, "Couldn't setup TX queue.\n");
    }

	/* Restart specific port */
	ret = rte_eth_dev_start(port_id);
	if (ret < 0) {
		printf("Fail to restart port %d\n", port_id);
		return ret;
	}

    //设置混杂模式,dpdk既能接收到外界的数据,又能响应kni过来的数据
	rte_eth_promiscuous_enable(port_id);

	return 0;

}

int kni_config_network_if(uint16_t port_id, uint8_t if_up) {

	int ret = 0;

	if (!rte_eth_dev_is_valid_port(port_id)) {
		printf("Invalid port id %d\n", port_id);
		return -EINVAL;
	}

	printf("Configure network interface of %d %s\n",
					port_id, if_up ? "up" : "down");

	rte_atomic32_inc(&kni_pause);

	if (if_up != 0) { /* Configure network interface up */
		rte_eth_dev_stop(port_id);
		ret = rte_eth_dev_start(port_id);
	} else /* Configure network interface down */
		rte_eth_dev_stop(port_id);

	rte_atomic32_dec(&kni_pause);

	if (ret < 0)
		printf("Failed to start port %d\n", port_id);

	return ret;

}

int kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[]) {

	int ret = 0;

	if (!rte_eth_dev_is_valid_port(port_id)) {
		printf("Invalid port id %d\n", port_id);
		return -EINVAL;
	}

	printf("Configure mac address of %d\n", port_id);
	char buf[RTE_ETHER_ADDR_FMT_SIZE];
	rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, (struct rte_ether_addr*)mac_addr);
	printf("\tAddress: %s\n", buf);

	ret = rte_eth_dev_default_mac_addr_set(port_id,
					(struct rte_ether_addr *)mac_addr);
	if (ret < 0)
		printf("Failed to config mac_addr for port %d\n",
			port_id);

	return ret;

}

static int init_kni(void) {

	uint16_t num_of_kni_ports = rte_eth_dev_count_avail();
	if (num_of_kni_ports != 1)
		rte_exit(EXIT_FAILURE, "No supported Ethernet device found\n");
	
	rte_kni_init(num_of_kni_ports);

	struct rte_kni_conf conf;
    //初始化完成后,可以用ifconfig命令查看,ip、mac地址与dpdk的相同
	snprintf(conf.name, RTE_KNI_NAMESIZE, "vEth%u", g_dpdkPortId);
	conf.group_id = (uint16_t)g_dpdkPortId;
	conf.mbuf_size = MAX_PACKET_SIZE;

	struct rte_eth_dev_info dev_info;
	memset(&dev_info, 0, sizeof(dev_info));
	rte_eth_dev_info_get(g_dpdkPortId, &dev_info);

	rte_eth_macaddr_get(g_dpdkPortId, (struct rte_ether_addr*)&conf.mac_addr);
	rte_eth_dev_get_mtu(g_dpdkPortId, &conf.mtu);

	struct rte_kni_ops ops;
	memset(&ops, 0, sizeof(ops));
	ops.port_id = g_dpdkPortId;
	ops.change_mtu = kni_change_mtu;
	ops.config_network_if = kni_config_network_if;
	ops.config_mac_address = kni_config_mac_address;

	kni = rte_kni_alloc(pktmbuf_pool, &conf, &ops);
	if (!kni)
			rte_exit(EXIT_FAILURE, "Fail to create kni for "
						"port: %d\n", g_dpdkPortId);

	return 0;

}

static int free_kni(uint16_t port_id) {

	rte_kni_release(kni);
	rte_eth_dev_stop(port_id);

	return 0;
}

int main()
{
        init_kni();
        struct rte_mbuf *pkts_burst[BURST_SIZE];
		unsigned num_rx_recvd = rte_kni_rx_burst(kni, pkts_burst, BURST_SIZE);
		if (unlikely(num_rx_recvd > BURST_SIZE)) {
			printf("Error receiving from KNI\n");
			continue;
		}
		unsigned j = 0;
		for (j = 0;j < num_rx_recvd;j ++) {

			struct rte_ether_hdr *ehdr = rte_pktmbuf_mtod(pkts_burst[j], struct rte_ether_hdr*);
			printf(" kni : ehdr->ether_type --> %x\n", ntohs(ehdr->ether_type));

		}

		unsigned nb_tx = rte_eth_tx_burst(g_dpdkPortId, 0, pkts_burst, (uint16_t)num_rx_recvd);

        burst_free_mbufs(pkts_burst, num_rx_recvd);
}

执行代码后,可以通过ifconfig -a查看kni的设置,注意此时没有真正工作,需要设置ip地址和mac地址(即使mac地址已存在)

# 需要设置和dpdk一样的ip和mac
ifconfig vEth0 192.168.0.120 hw ether 00:0c:29:85:2e:88 up

你可能感兴趣的:(笔记,后端,架构)