/* Added for EL7 compatibility */ #include #include #include /* Ensure that sys/types.h and sys/socket.h are included before linux/if.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct rte_eth_conf eth_conf_default = { .rxmode = { .mq_mode = RTE_ETH_MQ_RX_NONE, }, .txmode = { .mq_mode = RTE_ETH_MQ_TX_NONE, }, }; static uint8_t default_sym_rss_key[40] = {0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a}; struct dpdk_dev_candidate { TAILQ_ENTRY(dpdk_dev_candidate) next; /* kernel name */ char kernel_name[MR_SYMBOL_MAX]; /* vdev name, for dpdk vdevs */ char dpdk_vdev_name[MR_SYMBOL_MAX]; /* PCI address */ struct rte_pci_addr pci_addr; /* STR PCI address */ char str_pci_addr[PCI_PRI_STR_SIZE]; /** for real network card, the name is str_pci_addr * for virtio, the name is dpdk_vdev_name * Its value is obtained through rte_eth_dev_get_name_by_port() */ char devname[RTE_ETH_NAME_MAX_LEN]; /* vendor */ char vendor[MR_SYMBOL_MAX]; /* driver */ char driver[MR_SYMBOL_MAX]; }; struct devmgr_main { /* dpdk devices candidate */ TAILQ_HEAD(dpdk_dev_candidate_db_head, dpdk_dev_candidate) dpdk_dev_candidate_list; /* device desc for all kind of driver */ struct mr_dev_desc * dev_descs[MR_DEVICE_MAX]; /* shmdev port_id counter */ unsigned int shmdev_port_id_counter; /* hardware info for dpdk devices, * this file is generated by the helper script before the main process run */ struct cJSON * j_hwfile; /* sc_main handler */ struct sc_main * sc; }; static const char * str_rssmode(unsigned int rssmode) { switch (rssmode) { case MR_DEV_RSSMODE_DEFAULT: return "Default"; case MR_DEV_RSSMODE_2TUPLE_SYM: return "SAddr/DAddr(Sym)"; case MR_DEV_RSSMODE_4TUPLE_SYM: return "SAddr/DAddr/SPort/DPort(Sym)"; case MR_DEV_RSSMODE_4TUPLE_ASYM: return "SAddr/DAddr/SPort/DPort(Asym)"; default: return "Unknown"; } } static const char * str_enable_or_disable(unsigned int value) { if (value) return "ENABLE"; else return "DISABLE"; } static const char * str_dev_driver(unsigned int drv_type) { static const char * dev_driver_as_str[] = { [MR_DEV_DRV_TYPE_DPDK_PCI] = "dpdk_pci", [MR_DEV_DRV_TYPE_DPDK_VIRTIO_USER] = "dpdk_virtio_user", [MR_DEV_DRV_TYPE_DPDK_AF_PACKET] = "dpdk_af_packet", [MR_DEV_DRV_TYPE_SHMDEV] = "shmdev", }; return dev_driver_as_str[drv_type]; } static const char * str_dev_role(unsigned int role_type) { static const char * dev_role_as_str[] = { [MR_DEV_ROLE_NONE] = "none", [MR_DEV_ROLE_VWIRE_INTERFACE] = "virtual wire", [MR_DEV_ROLE_TAP_INTERFACE] = "tap", [MR_DEV_ROLE_ENDPOINT_INTERFACE] = "endpoint", [MR_DEV_ROLE_NF_INTERFACE] = "network function", [MR_DEV_ROLE_KERNEL_RESP_INTERFACE] = "kernel resp", }; return dev_role_as_str[role_type]; } static const char * str_dev_mode(unsigned int dev_mode) { static const char * dev_mode_as_str[] = { [MR_DEV_MODE_ACCESS] = "access", [MR_DEV_MODE_TRUNK] = "trunk", }; return dev_mode_as_str[dev_mode]; } static const char * str_dev_type(unsigned int dev_type) { static const char * dev_type_as_str[] = { [MR_DEV_TYPE_ETH] = "ethernet", [MR_DEV_TYPE_BOND] = "bond", }; return dev_type_as_str[dev_type]; } static int mr_dev_desc_status_print(struct mr_dev_desc * dev_desc) { MR_INFO(" Type : %s", str_dev_type(dev_desc->type)); MR_INFO(" Driver : %s", str_dev_driver(dev_desc->drv_type)); MR_INFO(" Role : %s", str_dev_role(dev_desc->role_type)); MR_INFO(" Mode : %s", str_dev_mode(dev_desc->dev_mode)); return 0; } static int mr_dev_desc_ip_addr_print(struct mr_dev_desc * dev_desc) { if (dev_desc->dev_mode == MR_DEV_MODE_ACCESS) { char str_in_addr[INET_ADDRSTRLEN]; char str_in_mask[INET_ADDRSTRLEN]; char str_gateway[INET_ADDRSTRLEN]; if (dev_desc->in_addr.s_addr != 0) { inet_ntop(AF_INET, &dev_desc->in_addr, str_in_addr, sizeof(str_in_addr)); inet_ntop(AF_INET, &dev_desc->in_mask, str_in_mask, sizeof(str_in_mask)); inet_ntop(AF_INET, &dev_desc->in_gateway, str_gateway, sizeof(str_gateway)); MR_INFO(" IP Address : %s", str_in_addr); MR_INFO(" IP Mask : %s", str_in_mask); MR_INFO(" Gateway : %s", str_gateway); } } else if (dev_desc->dev_mode == MR_DEV_MODE_TRUNK) { for (int i = 0; i < dev_desc->nr_vlan_members; i++) { struct vlan_member * vlan_member = &dev_desc->vlan_members[i]; MR_INFO(" VLAN Member : %d", i); MR_INFO(" VLAN ID : %d", rte_be_to_cpu_16(vlan_member->vlan_id)); if (vlan_member->sa_family_v4 == AF_INET) { char str_in_addr_v4[INET_ADDRSTRLEN]; char str_in_mask_v4[INET_ADDRSTRLEN]; inet_ntop(AF_INET, &vlan_member->in_addr, str_in_addr_v4, sizeof(str_in_addr_v4)); inet_ntop(AF_INET, &vlan_member->in_mask, str_in_mask_v4, sizeof(str_in_mask_v4)); MR_INFO(" IPv4 Address : %s", str_in_addr_v4); MR_INFO(" IPv4 Mask : %s", str_in_mask_v4); } if (vlan_member->sa_family_v6 == AF_INET6) { char str_in_addr_v6[INET6_ADDRSTRLEN]; char str_in_mask_v6[INET6_ADDRSTRLEN]; inet_ntop(AF_INET6, &vlan_member->in6_addr, str_in_addr_v6, sizeof(str_in_addr_v6)); inet_ntop(AF_INET6, &vlan_member->in6_mask, str_in_mask_v6, sizeof(str_in_mask_v6)); MR_INFO(" IPv6 Address : %s", str_in_addr_v6); MR_INFO(" IPv6 Mask : %s", str_in_mask_v6); } } } return 0; } static const char * str_bond_mode(unsigned int bond_mode) { static const char * _str_bond_mode_map[] = { [BONDING_MODE_ROUND_ROBIN] = "round_robin", [BONDING_MODE_ACTIVE_BACKUP] = "active_backup", [BONDING_MODE_BALANCE] = "balance", [BONDING_MODE_BROADCAST] = "broadcast", [BONDING_MODE_8023AD] = "802.3ad", [BONDING_MODE_TLB] = "tlb", [BONDING_MODE_ALB] = "alb", }; return _str_bond_mode_map[bond_mode]; } static const char * str_bond_xmit_policy(unsigned int xmit_policy) { static const char * _str_bond_xmit_policy_map[] = { [BALANCE_XMIT_POLICY_LAYER2] = "layer2", [BALANCE_XMIT_POLICY_LAYER23] = "layer23", [BALANCE_XMIT_POLICY_LAYER34] = "layer34", }; return _str_bond_xmit_policy_map[xmit_policy]; } static const char * str_bond_agg_selection(unsigned int agg_selection) { static const char * _str_bond_agg_selection_map[] = { [AGG_BANDWIDTH] = "bandwidth", [AGG_STABLE] = "stable", [AGG_COUNT] = "count", }; return _str_bond_agg_selection_map[agg_selection]; } static int dpdk_dev_status_print(struct dpdk_dev * dev) { char str_phy_addr[MR_SYMBOL_MAX]; rte_ether_format_addr(str_phy_addr, sizeof(str_phy_addr), &dev->ether_addr); char devname[RTE_ETH_NAME_MAX_LEN]; rte_eth_dev_get_name_by_port(dev->port_id, devname); MR_INFO(" "); MR_INFO("DPDK based device %s: PortID = %d", dev->symbol, dev->port_id); /* print the common info of the dpdk device */ assert(dev->ref_dev_desc != NULL); mr_dev_desc_status_print(dev->ref_dev_desc); /* then, the detail info of the dpdk device */ MR_INFO(" devname : %s", devname); MR_INFO(" HWADDR : %s", str_phy_addr); MR_INFO(" Maximum Transmission Unit : %u", dev->mtu); MR_INFO(" Promiscuous mode : %s", str_enable_or_disable(dev->promisc)); MR_INFO(" VLAN-Filter : %s", str_enable_or_disable(dev->en_vlan_filter)); MR_INFO(" VLAN-Strip : %s", str_enable_or_disable(dev->en_vlan_strip)); MR_INFO(" Drop-En : %s", str_enable_or_disable(dev->en_drop)); MR_INFO(" RSSMode : %s", str_rssmode(dev->rssmode)); #if 0 #if RTE_VERSION >= RTE_VERSION_NUM(23, 11, 0, 0) rte_eth_dev_priv_dump(dev->port_id, stderr); #endif #endif /* Print the ip addr */ mr_dev_desc_ip_addr_print(dev->ref_dev_desc); return 0; } static void all_dpdk_dev_status_print(struct devmgr_main * devmgr_main) { unsigned int dev_iterator = 0; struct mr_dev_desc * dev_desc_iter = NULL; while ((dev_desc_iter = mr_dev_desc_iterate(devmgr_main, &dev_iterator)) != NULL) { if (dev_desc_iter->dpdk_dev_desc != NULL) { dpdk_dev_status_print(dev_desc_iter->dpdk_dev_desc); } } } static struct mr_dev_desc_qid_map * qid_map_create(cpu_set_t * cpu_set_ptr) { struct mr_dev_desc_qid_map * qid_map_object = ZMALLOC(sizeof(struct mr_dev_desc_qid_map)); MR_VERIFY_MALLOC(qid_map_object); for (unsigned int i = 0; i < CPU_COUNT(cpu_set_ptr); i++) { cpu_id_t cpu_id = cpu_set_location(cpu_set_ptr, i); assert(qid_map_object->qid_enabled[cpu_id] == 0); assert(qid_map_object->qid_map[cpu_id] == 0); qid_map_object->qid_enabled[cpu_id] = 1; qid_map_object->qid_map[cpu_id] = i; } return qid_map_object; } struct mr_dev_desc * mr_dev_desc_create(struct devmgr_main * devmgr_main, const char * devsym) { struct mr_dev_desc * dev_desc = ZMALLOC(sizeof(struct mr_dev_desc)); MR_VERIFY_MALLOC(dev_desc); snprintf(dev_desc->symbol, sizeof(dev_desc->symbol) - 1, "%s", devsym); dev_desc->rx_node_id = RTE_NODE_ID_INVALID; dev_desc->tx_node_id = RTE_NODE_ID_INVALID; dev_desc->port_id = (uint32_t)(-1); return dev_desc; } uint32_t ipv6_mask_length(const struct in6_addr * ipv6_mask) { assert(ipv6_mask != NULL); int length = 0; for (int i = 0; i < sizeof(struct in6_addr); ++i) { for (int j = 7; j >= 0; --j) { if ((ipv6_mask->s6_addr[i] >> j) & 1) { length++; } else { return length; } } } return length; } static int kernel_resp_setup_sync(struct mr_dev_desc * dev_desc) { if (dev_desc->drv_type != MR_DEV_DRV_TYPE_DPDK_VIRTIO_USER) { return RT_SUCCESS; } /* Create the IPv4 sockfd*/ int sockfd_v4 = socket(AF_INET, SOCK_DGRAM, 0); if (sockfd_v4 < 0) { MR_ERROR("Creating an IPv4 socket for setting the response device %s failed.", dev_desc->symbol); return RT_ERR; } /* Set mac addr */ struct ifreq ifr = {}; snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", dev_desc->symbol); ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; rte_memcpy(ifr.ifr_hwaddr.sa_data, &dev_desc->eth_addr.addr_bytes, RTE_ETHER_ADDR_LEN); if (ioctl(sockfd_v4, SIOCSIFHWADDR, &ifr) < 0) { MR_ERROR("Failed to set the MAC address for the response device %s. The error code is %d.", dev_desc->symbol, errno); close(sockfd_v4); return RT_ERR; } /* Get current dev MTU */ memset(&ifr, 0, sizeof(ifr)); snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", dev_desc->symbol); if (ioctl(sockfd_v4, SIOCGIFMTU, &ifr) < 0) { MR_ERROR("Failed to obtain the MTU for the response device %s. The error code is %d.", dev_desc->symbol, errno); close(sockfd_v4); return RT_ERR; } /* Set the MTU */ if ((dev_desc->dpdk_dev_desc->mtu != 0) && (dev_desc->dpdk_dev_desc->mtu != ifr.ifr_mtu)) { memset(&ifr, 0, sizeof(ifr)); snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", dev_desc->symbol); ifr.ifr_mtu = dev_desc->dpdk_dev_desc->mtu; if (ioctl(sockfd_v4, SIOCSIFMTU, &ifr) < 0) { MR_ERROR("Failed to set the MTU %u for the response device %s. The error code is %d.", dev_desc->dpdk_dev_desc->mtu, dev_desc->symbol, errno); close(sockfd_v4); return RT_ERR; } } /* Check the vlan members */ if (dev_desc->nr_vlan_members == 0) { close(sockfd_v4); return RT_SUCCESS; } /* Set the VLAN sub-interface */ for (int i = 0; i < dev_desc->nr_vlan_members; i++) { /* Create the VLAN sub-interface */ char sub_symbol[MR_SYMBOL_MAX]; struct vlan_member * vlan_member = &dev_desc->vlan_members[i]; snprintf(sub_symbol, sizeof(sub_symbol) - 1, "%s.%d", dev_desc->symbol, rte_be_to_cpu_16(vlan_member->vlan_id)); struct vlan_ioctl_args vlan_args = {}; snprintf(vlan_args.device1, sizeof(vlan_args.device1) - 1, "%s", dev_desc->symbol); vlan_args.cmd = ADD_VLAN_CMD; vlan_args.u.VID = rte_be_to_cpu_16(vlan_member->vlan_id); if (ioctl(sockfd_v4, SIOCSIFVLAN, &vlan_args) < 0) { MR_ERROR("Failed to create the VLAN sub-interface %s. The error code is %d.", sub_symbol, errno); close(sockfd_v4); return RT_ERR; } /* Get current dev MTU */ memset(&ifr, 0, sizeof(ifr)); snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", sub_symbol); if (ioctl(sockfd_v4, SIOCGIFMTU, &ifr) < 0) { MR_ERROR("Failed to get the VLAN sub-interface %s MTU. The error code is %d.", sub_symbol, errno); close(sockfd_v4); return RT_ERR; } /* Get current dev MTU */ memset(&ifr, 0, sizeof(ifr)); snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", sub_symbol); if (ioctl(sockfd_v4, SIOCGIFMTU, &ifr) < 0) { MR_ERROR("Failed to obtain the MTU for the VLAN sub-interface %s. The error code is %d.", sub_symbol, errno); close(sockfd_v4); return RT_ERR; } /* Set the MTU */ if ((dev_desc->dpdk_dev_desc->mtu != 0) && (dev_desc->dpdk_dev_desc->mtu != ifr.ifr_mtu)) { memset(&ifr, 0, sizeof(ifr)); snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", sub_symbol); ifr.ifr_mtu = dev_desc->dpdk_dev_desc->mtu; if (ioctl(sockfd_v4, SIOCSIFMTU, &ifr) < 0) { MR_ERROR("Failed to set the MTU %u for the VLAN sub-interface %s. The error code is %d.", dev_desc->dpdk_dev_desc->mtu, sub_symbol, errno); close(sockfd_v4); return RT_ERR; } } /* Set the device up and running */ memset(&ifr, 0, sizeof(ifr)); snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", sub_symbol); ifr.ifr_flags = IFF_UP | IFF_RUNNING; if (ioctl(sockfd_v4, SIOCSIFFLAGS, &ifr) < 0) { MR_ERROR("Failed to set the VLAN sub-interface %s up and running. The error code is %d.", sub_symbol, errno); close(sockfd_v4); return RT_ERR; } /* Set the IPv4 addr mask and gateway */ if (vlan_member->sa_family_v4 == AF_INET) { /* Set IPv4 addr */ memset(&ifr, 0, sizeof(ifr)); snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", sub_symbol); struct sockaddr_in * addr = (struct sockaddr_in *)&ifr.ifr_addr; addr->sin_family = AF_INET; addr->sin_addr = vlan_member->in_addr; addr->sin_port = 0; if (ioctl(sockfd_v4, SIOCSIFADDR, &ifr) < 0) { MR_ERROR("Failed to set the IPv4 address for the VLAN sub-interface %s. The error code is %d.", sub_symbol, errno); close(sockfd_v4); return RT_ERR; } /* Set IPv4 mask */ struct sockaddr_in * mask = (struct sockaddr_in *)&ifr.ifr_netmask; mask->sin_family = AF_INET; mask->sin_addr = vlan_member->in_mask; mask->sin_port = 0; if (ioctl(sockfd_v4, SIOCSIFNETMASK, &ifr) < 0) { MR_ERROR("Failed to set the IPv4 mask for the VLAN sub-interface %s. The error code is %d.", sub_symbol, errno); close(sockfd_v4); return RT_ERR; } } /* Set the IPv6 addr mask and gateway */ if (vlan_member->sa_family_v6 == AF_INET6) { int sockfd_v6 = socket(AF_INET6, SOCK_DGRAM, IPPROTO_IP); if (sockfd_v6 < 0) { MR_ERROR("Failed to create a IPv6 socket while setting the response device %s. The error code is %d.", dev_desc->symbol, errno); return RT_ERR; } /* Get sub-interface ifindex */ memset(&ifr, 0, sizeof(ifr)); snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", sub_symbol); if (ioctl(sockfd_v6, SIOGIFINDEX, &ifr) < 0) { MR_ERROR("Failed to get the ifindex of the VLAN sub-interface %s. The error code is %d.", sub_symbol, errno); close(sockfd_v6); return RT_ERR; } /* Set IPv6 addr and mask */ struct in6_ifreq ifr6; memset(&ifr6, 0, sizeof(ifr6)); ifr6.ifr6_ifindex = ifr.ifr_ifindex; ifr6.ifr6_addr = vlan_member->in6_addr; ifr6.ifr6_prefixlen = ipv6_mask_length(&vlan_member->in6_mask); int ret = ioctl(sockfd_v6, SIOCSIFADDR, &ifr6); if (ret < 0) { MR_ERROR("Failed to set the IPv6 address and mask for the VLAN sub-interface %s. The error code is %d.", sub_symbol, errno); close(sockfd_v6); return RT_ERR; } close(sockfd_v6); } } close(sockfd_v4); return RT_SUCCESS; } static int kernel_resp_crosslink(struct devmgr_main * devmgr_main) { unsigned int dev_iterator = 0; struct mr_dev_desc * dev_desc_iter = NULL; while ((dev_desc_iter = mr_dev_desc_iterate(devmgr_main, &dev_iterator)) != NULL) { /* this device has no representor */ if (dev_desc_iter->representor_config == NULL) { continue; } struct representor_config * resp_config = dev_desc_iter->representor_config; /* the device has a representor in app namespace, * this representor will create by the app library */ if (resp_config->ns_type != REPRESENTOR_NS_SERVICE) { continue; } struct mr_dev_desc * representor = mr_dev_desc_lookup(devmgr_main, resp_config->str_representor_symbol); if (representor == NULL) { MR_WARNING("The representor device %s for device %s is not existed.", resp_config->str_representor_symbol, dev_desc_iter->symbol); continue; } assert(representor->represented_device == NULL); assert(dev_desc_iter->device_representor == NULL); dev_desc_iter->device_representor = representor; representor->represented_device = dev_desc_iter; /* Set the MAC address of the device representor based on the represented device */ rte_ether_addr_copy(&dev_desc_iter->eth_addr, &representor->eth_addr); if (representor->dpdk_dev_desc != NULL) { rte_ether_addr_copy(&dev_desc_iter->eth_addr, &representor->dpdk_dev_desc->ether_addr); } kernel_resp_setup_sync(representor); } return 0; } static struct representor_config * kernel_resp_config_load(struct devmgr_main * devmgr_main, const char * devsym) { struct sc_main * sc = devmgr_main->sc; const char * cfg = sc->local_cfgfile; char str_section[MR_SYMBOL_MAX]; snprintf(str_section, sizeof(str_section), "device:%s", devsym); unsigned int en_representor = 0; MESA_load_profile_uint_def(cfg, str_section, "en_representor", &en_representor, 0); if (en_representor == 0) { return NULL; } struct representor_config * resp_cfg = ZMALLOC(sizeof(struct representor_config)); MR_VERIFY_MALLOC(resp_cfg); /* for default, redirect all kinds of local packets except tunnels */ MESA_load_profile_uint_def(cfg, str_section, "representor_ns", &resp_cfg->ns_type, 0); MESA_load_profile_string_def(cfg, str_section, "representor_dev", resp_cfg->str_representor_symbol, sizeof(resp_cfg->str_representor_symbol), ""); MESA_load_profile_uint_def(cfg, str_section, "redirect_local_arp", &resp_cfg->redirect_local_arp, 1); MESA_load_profile_uint_def(cfg, str_section, "redirect_local_rarp", &resp_cfg->redirect_local_ipv4, 1); MESA_load_profile_uint_def(cfg, str_section, "redirect_local_lldp", &resp_cfg->redirect_local_lldp, 1); MESA_load_profile_uint_def(cfg, str_section, "redirect_local_ipv4", &resp_cfg->redirect_local_ipv4, 1); MESA_load_profile_uint_def(cfg, str_section, "redirect_local_ipv6", &resp_cfg->redirect_local_ipv6, 1); return resp_cfg; } int mr_dev_desc_config_load(struct devmgr_main * devmgr_main, struct mr_dev_desc * dev_desc) { const char * cfgfile = devmgr_main->sc->local_cfgfile; char str_section[MR_SYMBOL_MAX * 2] = {}; snprintf(str_section, sizeof(str_section) - 1, "device:%s", dev_desc->symbol); MESA_load_profile_uint_def(cfgfile, str_section, "driver", &dev_desc->drv_type, MR_DEV_DRV_TYPE_DPDK_PCI); MESA_load_profile_uint_def(cfgfile, str_section, "role", &dev_desc->role_type, MR_DEV_ROLE_NONE); MESA_load_profile_uint_def(cfgfile, str_section, "encode", &dev_desc->encode_type, MR_DEV_ENCODE_TYPE_NONE); /* for the kernel resp */ dev_desc->representor_config = kernel_resp_config_load(devmgr_main, dev_desc->symbol); /* rx cores */ cpu_set_t serv_io_cpu_set = devmgr_main->sc->cpu_set_io; unsigned int rx_cores[RTE_MAX_LCORE] = {0}; int nr_rx_cores = MESA_load_profile_uint_range(cfgfile, str_section, "rx_cores", RTE_DIM(rx_cores), rx_cores); if (nr_rx_cores > 0) { for (unsigned int i = 0; i < nr_rx_cores; i++) { CPU_SET(rx_cores[i], &dev_desc->rx_cpu_set); } } else { dev_desc->rx_cpu_set = serv_io_cpu_set; } /* the rx_cpu_set must be a subset of i/o cores, and the tx_cpu_set should same as i/o cores * because the tx may happen in any i/o core */ CPU_AND(&dev_desc->rx_cpu_set, &serv_io_cpu_set, &dev_desc->rx_cpu_set); dev_desc->tx_cpu_set = serv_io_cpu_set; /* generate the qid map */ dev_desc->rx_qid_map = qid_map_create(&dev_desc->rx_cpu_set); dev_desc->tx_qid_map = qid_map_create(&dev_desc->tx_cpu_set); MR_VERIFY(dev_desc->rx_qid_map != NULL); MR_VERIFY(dev_desc->tx_qid_map != NULL); /* Get ether */ char str_ether[MR_STRING_MAX] = {}; if (MESA_load_profile_string_nodef(cfgfile, str_section, "ether", str_ether, sizeof(str_ether)) >= 0) { if (rte_ether_unformat_addr(str_ether, &dev_desc->eth_addr) < 0) { MR_CFGERR_INVALID_FORMAT(cfgfile, str_section, "ether"); return RT_ERR; } } else { rte_eth_random_addr(dev_desc->eth_addr.addr_bytes); } /* Get allow vlan ids */ uint32_t vlan_ids[16]; int nr_vlan_members = MESA_load_profile_uint_range(cfgfile, str_section, "allow_vlan_ids", RTE_DIM(vlan_ids), vlan_ids); if (nr_vlan_members > 0) { if (nr_vlan_members > RTE_DIM(dev_desc->vlan_members)) { MR_CFGERR_INVALID_VALUE(cfgfile, str_section, "allow_vlan_ids", "entry count must be less than 16."); return RT_ERR; } if (dev_desc->drv_type == MR_DEV_DRV_TYPE_SHMDEV) { MR_CFGERR_INVALID_VALUE(cfgfile, str_section, "allow_vlan_ids", "cannot be configured for shmdev device."); return RT_ERR; } for (int i = 0; i < nr_vlan_members; i++) { char str_vlan_member_cfg[MR_STRING_MAX] = {}; snprintf(str_vlan_member_cfg, sizeof(str_vlan_member_cfg) - 1, "%s:vlan:%d", str_section, vlan_ids[i]); int sa_family_v4 = AF_UNSPEC; struct in_addr in_addr = {}; struct in_addr in_mask = {}; char str_in_addr_v4[INET_ADDRSTRLEN]; if (MESA_load_profile_string_nodef(cfgfile, str_vlan_member_cfg, "in_addr_v4", str_in_addr_v4, sizeof(str_in_addr_v4)) >= 0) { sa_family_v4 = AF_INET; if (inet_pton(AF_INET, str_in_addr_v4, &in_addr) <= 0) { MR_CFGERR_INVALID_FORMAT(cfgfile, str_vlan_member_cfg, "in_addr_v4"); return RT_ERR; } char str_in_mask_v4[INET_ADDRSTRLEN]; if (MESA_load_profile_string_nodef(cfgfile, str_vlan_member_cfg, "in_mask_v4", str_in_mask_v4, sizeof(str_in_mask_v4)) < 0) { MR_CFGERR_INVALID_VALUE(cfgfile, str_vlan_member_cfg, "in_mask", "must be configured."); return RT_ERR; } if (inet_pton(AF_INET, str_in_mask_v4, &in_mask) <= 0) { MR_CFGERR_INVALID_FORMAT(cfgfile, str_vlan_member_cfg, "in_mask_v4"); return RT_ERR; } } int sa_family_v6 = AF_UNSPEC; struct in6_addr in6_addr = {}; struct in6_addr in6_mask = {}; char str_in_addr_v6[INET6_ADDRSTRLEN]; if (MESA_load_profile_string_nodef(cfgfile, str_vlan_member_cfg, "in_addr_v6", str_in_addr_v6, sizeof(str_in_addr_v6)) >= 0) { sa_family_v6 = AF_INET6; if (inet_pton(AF_INET6, str_in_addr_v6, &in6_addr) <= 0) { MR_CFGERR_INVALID_FORMAT(cfgfile, str_vlan_member_cfg, "in_addr_v6"); return RT_ERR; } char str_in_mask_v6[INET6_ADDRSTRLEN]; if (MESA_load_profile_string_nodef(cfgfile, str_vlan_member_cfg, "in_mask_v6", str_in_mask_v6, sizeof(str_in_mask_v6)) < 0) { MR_CFGERR_INVALID_VALUE(cfgfile, str_vlan_member_cfg, "in_mask_v6", "must be configured."); return RT_ERR; } if (inet_pton(AF_INET6, str_in_mask_v6, &in6_mask) <= 0) { MR_CFGERR_INVALID_FORMAT(cfgfile, str_vlan_member_cfg, "in_mask_v6"); return RT_ERR; } } struct vlan_member * vlan_member = &dev_desc->vlan_members[i]; vlan_member->vlan_id = rte_cpu_to_be_16(vlan_ids[i]); vlan_member->sa_family_v4 = sa_family_v4; vlan_member->in_addr = in_addr; vlan_member->in_mask = in_mask; vlan_member->sa_family_v6 = sa_family_v6; rte_memcpy(&vlan_member->in6_addr, &in6_addr, sizeof(in6_addr)); rte_memcpy(&vlan_member->in6_mask, &in6_mask, sizeof(in6_mask)); } dev_desc->dev_mode = MR_DEV_MODE_TRUNK; dev_desc->nr_vlan_members = nr_vlan_members; return RT_SUCCESS; } /* for endpoint and route, try to load ip addr settings */ char str_in_addr[INET_ADDRSTRLEN]; if (MESA_load_profile_string_nodef(cfgfile, str_section, "in_addr", str_in_addr, sizeof(str_in_addr)) < 0) { return RT_SUCCESS; } int ret = inet_pton(AF_INET, str_in_addr, &dev_desc->in_addr); if (ret <= 0) { MR_CFGERR_INVALID_FORMAT(cfgfile, str_section, "in_addr"); return RT_ERR; } char str_in_mask[INET_ADDRSTRLEN]; if (MESA_load_profile_string_nodef(cfgfile, str_section, "in_mask", str_in_mask, sizeof(str_in_mask)) < 0) { return RT_ERR; } ret = inet_pton(AF_INET, str_in_mask, &dev_desc->in_mask); if (ret <= 0) { MR_CFGERR_INVALID_FORMAT(cfgfile, str_section, "in_mask"); return RT_ERR; } char str_gateway[INET_ADDRSTRLEN]; if (MESA_load_profile_string_nodef(cfgfile, str_section, "gateway", str_gateway, sizeof(str_gateway)) >= 0) { ret = inet_pton(AF_INET, str_gateway, &dev_desc->in_gateway); if (ret <= 0) { MR_CFGERR_INVALID_FORMAT(cfgfile, str_section, "gateway"); return RT_ERR; } } dev_desc->dev_mode = MR_DEV_MODE_ACCESS; return RT_SUCCESS; } struct mr_dev_desc * mr_dev_desc_lookup(struct devmgr_main * devmgr_main, const char * devsym) { for (unsigned int i = 0; i < RTE_DIM(devmgr_main->dev_descs); i++) { struct mr_dev_desc * dev_desc = devmgr_main->dev_descs[i]; if (dev_desc == NULL || strcasecmp(dev_desc->symbol, devsym) != 0) { continue; } return dev_desc; } return NULL; } struct mr_dev_desc * mr_dev_desc_iterate(struct devmgr_main * devmgr_main, unsigned int * iterator) { for (; *iterator < RTE_DIM(devmgr_main->dev_descs); (*iterator)++) { struct mr_dev_desc * dev_desc = devmgr_main->dev_descs[*iterator]; if (dev_desc == NULL) continue; (*iterator)++; return dev_desc; } return NULL; } struct mr_dev_desc * mr_dev_desc_lookup_by_port_id(struct devmgr_main * devmgr_main, port_id_t port_id) { if (unlikely(port_id >= RTE_DIM(devmgr_main->dev_descs))) return NULL; return devmgr_main->dev_descs[port_id]; } static char ** gcfg_device_syms_get_by_type(struct sc_main * sc, enum mr_dev_type dev_type, unsigned int * nr_drvs) { char str_dev_list[MR_STRING_MAX] = {}; MESA_load_profile_string_nodef(sc->local_cfgfile, "device", "device", str_dev_list, sizeof(str_dev_list)); /* 从列表中抽取每一个网卡名 */ char * str_dev_tokens[MR_TOKENS_MAX] = {}; int nr_str_tokens = rte_strsplit(str_dev_list, sizeof(str_dev_list), str_dev_tokens, MR_TOKENS_MAX, ','); if (nr_str_tokens < 0) { return NULL; } char ** out_dev_symbols = malloc(MR_TOKENS_MAX * sizeof(char *)); unsigned int nr_out_dev_symbols = 0; /* 遍历所有dev设备 */ for (int i = 0; i < nr_str_tokens; i++) { /* device name */ char * str_dev_symbol = str_dev_tokens[i]; assert(str_dev_symbol != NULL); /* read the driver setting */ char str_section[MR_SYMBOL_MAX]; snprintf(str_section, sizeof(str_section) - 1, "device:%s", str_dev_symbol); unsigned int cfg_type = 0; MESA_load_profile_uint_def(sc->local_cfgfile, str_section, "type", &cfg_type, MR_DEV_TYPE_ETH); if (cfg_type != dev_type) continue; out_dev_symbols[nr_out_dev_symbols] = strdup(str_dev_symbol); nr_out_dev_symbols++; } *nr_drvs = nr_out_dev_symbols; return out_dev_symbols; } static char ** gcfg_device_syms_get_by_drv(struct sc_main * sc, enum mr_dev_driver drv_type, unsigned int * nr_drvs) { char str_dev_list[MR_STRING_MAX] = {}; MESA_load_profile_string_nodef(sc->local_cfgfile, "device", "device", str_dev_list, sizeof(str_dev_list)); /* 从列表中抽取每一个网卡名 */ char * str_dev_tokens[MR_TOKENS_MAX] = {}; int nr_str_tokens = rte_strsplit(str_dev_list, sizeof(str_dev_list), str_dev_tokens, MR_TOKENS_MAX, ','); if (nr_str_tokens < 0) { return NULL; } char ** out_dev_symbols = malloc(MR_TOKENS_MAX * sizeof(char *)); unsigned int nr_out_dev_symbols = 0; /* 遍历所有dev设备 */ for (int i = 0; i < nr_str_tokens; i++) { /* device name */ char * str_dev_symbol = str_dev_tokens[i]; assert(str_dev_symbol != NULL); /* read the driver setting */ char str_section[MR_SYMBOL_MAX]; snprintf(str_section, sizeof(str_section) - 1, "device:%s", str_dev_symbol); unsigned int cfg_driver = 0; MESA_load_profile_uint_def(sc->local_cfgfile, str_section, "driver", &cfg_driver, MR_DEV_DRV_TYPE_DPDK_PCI); if (cfg_driver != drv_type) continue; out_dev_symbols[nr_out_dev_symbols] = strdup(str_dev_symbol); nr_out_dev_symbols++; } *nr_drvs = nr_out_dev_symbols; return out_dev_symbols; } struct shmdev_config { unsigned int sz_tun_rx; unsigned int sz_tun_tx; unsigned int sz_max_inflight; unsigned int batch_interval_in_us; }; void shmdev_config_load(struct devmgr_main * devmgr_main, const char * devsym, struct shmdev_config * cfg_out) { const char * cfgfile = devmgr_main->sc->local_cfgfile; /* load the old tunnel settings */ unsigned int default_sz_tunnel; MESA_load_profile_uint_def(cfgfile, "device", "sz_tunnel", &default_sz_tunnel, 4096); unsigned int default_sz_tun_rx = default_sz_tunnel; unsigned int default_sz_tun_tx = default_sz_tunnel; unsigned int default_sz_max_inflight = 0; /* override configuration */ MESA_load_profile_uint_def(cfgfile, "device", "sz_rx_tunnel", &default_sz_tun_rx, default_sz_tun_rx); MESA_load_profile_uint_def(cfgfile, "device", "sz_tx_tunnel", &default_sz_tun_tx, default_sz_tun_tx); MESA_load_profile_uint_def(cfgfile, "device", "sz_max_inflight", &default_sz_max_inflight, default_sz_max_inflight); unsigned int default_batch_interval_in_us; MESA_load_profile_uint_def(cfgfile, "device", "batch_interval_tsc", &default_batch_interval_in_us, 50); cfg_out->sz_tun_rx = default_sz_tun_rx; cfg_out->sz_tun_tx = default_sz_tun_tx; cfg_out->sz_max_inflight = default_sz_max_inflight; cfg_out->batch_interval_in_us = default_batch_interval_in_us; } int shmdev_setup_one_device(struct devmgr_main * devmgr_main, const char * devsym) { /* prepare the dev_desc */ struct mr_dev_desc * dev_desc = mr_dev_desc_create(devmgr_main, devsym); int ret = mr_dev_desc_config_load(devmgr_main, dev_desc); if (unlikely(ret < 0)) { MR_ERROR("Failed at loading config for the shmdev device %s", devsym); return RT_ERR; } char sym_direct_mempool[MR_SYMBOL_MAX] = {0}; struct shmdev_config shmdev_config = {}; shmdev_config_load(devmgr_main, devsym, &shmdev_config); struct sc_main * sc = devmgr_main->sc; unsigned int nr_rxstream = CPU_COUNT(&dev_desc->rx_cpu_set); unsigned int nr_txstream = CPU_COUNT(&dev_desc->tx_cpu_set); assert(nr_rxstream > 0 && nr_txstream > 0); /* Get indirect pool */ struct rte_mempool * direct_pool = mrb_direct_mempool_locate(sc->mrb_pool_main, sym_direct_mempool, 0, 0); if (direct_pool == NULL) { MR_ERROR("Direct mempool %s for virtual device %s is not existed. ", sym_direct_mempool, devsym); return RT_ERR; } ret = vdev_data_create(sc->vdev_main, devsym, nr_rxstream, nr_txstream, shmdev_config.sz_tun_rx, shmdev_config.sz_tun_tx, shmdev_config.sz_max_inflight, shmdev_config.batch_interval_in_us, direct_pool); if (unlikely(ret < 0)) { MR_ERROR("Failed at creating shmdev resources for device %s", devsym); return RT_ERR; } struct vdev * vdev_desc = vdev_lookup(sc->vdev_main, devsym); MR_VERIFY_2(vdev_desc != NULL, "vdev_lookup() returns NULL"); dev_desc->port_id = devmgr_main->shmdev_port_id_counter++; dev_desc->drv_type = MR_DEV_DRV_TYPE_SHMDEV; dev_desc->shm_dev_desc = vdev_desc; if (dev_desc->port_id >= RTE_DIM(devmgr_main->dev_descs)) { MR_ERROR("Too many shmdev devices (max=%lu).", RTE_DIM(devmgr_main->dev_descs)); return RT_ERR; } /* inherit configuration from dev_desc */ struct representor_config * resp_config = dev_desc->representor_config; if (resp_config != NULL && resp_config->ns_type == REPRESENTOR_NS_APP) { vdev_desc->representor_config.enable = 1; vdev_desc->representor_config.redirect_local_arp = resp_config->redirect_local_arp; vdev_desc->representor_config.redirect_local_lldp = resp_config->redirect_local_lldp; vdev_desc->representor_config.redirect_local_rarp = resp_config->redirect_local_rarp; vdev_desc->representor_config.redirect_local_ipv4 = resp_config->redirect_local_ipv4; vdev_desc->representor_config.redirect_local_ipv6 = resp_config->redirect_local_ipv6; } /* copy the ether addr, inet addr to vdev desc */ vdev_desc->ether_addr = dev_desc->eth_addr; vdev_desc->in_addr = dev_desc->in_addr; vdev_desc->in_mask = dev_desc->in_mask; vdev_desc->in_gateway = dev_desc->in_gateway; /* add to index array */ devmgr_main->dev_descs[dev_desc->port_id] = dev_desc; return RT_SUCCESS; } int shmdev_init(struct devmgr_main * devmgr_main) { /* query the shmdev list */ unsigned int nr_devsyms = 0; char ** devsyms = gcfg_device_syms_get_by_drv(devmgr_main->sc, MR_DEV_DRV_TYPE_SHMDEV, &nr_devsyms); if (nr_devsyms == 0) { MR_INFO("No shmdev configuration found, ignore it."); return RT_SUCCESS; } for (unsigned int i = 0; i < nr_devsyms; i++) { int ret = shmdev_setup_one_device(devmgr_main, devsyms[i]); if (unlikely(ret < 0)) { MR_ERROR("Failed at init shmdev %s.", devsyms[i]); return RT_ERR; } } return RT_SUCCESS; } /* Set Virtio Tap Up */ int vhost_dev_setup(const char * virtio_sym) { int fd; struct ifreq ifr; fd = socket(AF_INET, SOCK_DGRAM, 0); strncpy(ifr.ifr_name, virtio_sym, IFNAMSIZ - 1); if (ioctl(fd, SIOCGIFFLAGS, &ifr) != 0) { return RT_ERR; } ifr.ifr_flags |= IFF_UP | IFF_RUNNING; if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) { return RT_ERR; } close(fd); return RT_SUCCESS; } static uint64_t rss_hf_all_field = RTE_ETH_RSS_ETH | RTE_ETH_RSS_VLAN | RTE_ETH_RSS_IP | RTE_ETH_RSS_TCP | RTE_ETH_RSS_UDP | RTE_ETH_RSS_SCTP | RTE_ETH_RSS_L2_PAYLOAD | RTE_ETH_RSS_L2TPV3 | RTE_ETH_RSS_ESP | RTE_ETH_RSS_AH | RTE_ETH_RSS_PFCP | RTE_ETH_RSS_GTPU | RTE_ETH_RSS_ECPRI | RTE_ETH_RSS_MPLS; /* 用户参数解析:网卡参数设置 */ static int gen_dpdk_dev_ethconf(struct dpdk_dev * dev, unsigned nr_rxq_use, struct rte_eth_conf * out_eth_conf) { struct mr_dev_desc * dev_desc = dev->ref_dev_desc; struct rte_eth_conf eth_conf = eth_conf_default; /* PCI devices */ if ((dev_desc->drv_type == MR_DEV_DRV_TYPE_DPDK_PCI) && (nr_rxq_use > 1)) { /* only PCI devices can run at RSS mode. */ eth_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_RSS; /* setup how NICs distributes packets */ if (dev->rssmode == MR_DEV_RSSMODE_2TUPLE_SYM) { eth_conf.rx_adv_conf.rss_conf.rss_hf = RTE_ETH_RSS_IP; eth_conf.rx_adv_conf.rss_conf.rss_key = default_sym_rss_key; eth_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(default_sym_rss_key); } else if (dev->rssmode == MR_DEV_RSSMODE_4TUPLE_SYM) { eth_conf.rx_adv_conf.rss_conf.rss_hf = rss_hf_all_field; eth_conf.rx_adv_conf.rss_conf.rss_key = default_sym_rss_key; eth_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(default_sym_rss_key); } else if (dev->rssmode == MR_DEV_RSSMODE_4TUPLE_ASYM) { eth_conf.rx_adv_conf.rss_conf.rss_hf = rss_hf_all_field; eth_conf.rx_adv_conf.rss_conf.rss_key = NULL; } /* According to dev info reset rss conf */ struct rte_eth_conf request_eth_conf = eth_conf; struct rte_eth_dev_info dev_info = {}; /* Get dev info */ rte_eth_dev_info_get(dev->port_id, &dev_info); if (dev_info.flow_type_rss_offloads == 0) { memcpy(ð_conf, ð_conf_default, sizeof(eth_conf)); MR_WARNING("The port '%s' no support rss.", dev->symbol); } else { /* Check request rss_hf the dev supported or not */ eth_conf.rx_adv_conf.rss_conf.rss_hf &= dev_info.flow_type_rss_offloads; if (eth_conf.rx_adv_conf.rss_conf.rss_hf != request_eth_conf.rx_adv_conf.rss_conf.rss_hf) { MR_WARNING("The port %s modified RSS hash function based on hardware support," "requested:%#" PRIx64 " configured:%#" PRIx64 "\n", dev->symbol, request_eth_conf.rx_adv_conf.rss_conf.rss_hf, eth_conf.rx_adv_conf.rss_conf.rss_hf); } } } else if (dev_desc->drv_type == MR_DEV_DRV_TYPE_DPDK_VIRTIO_USER || dev_desc->drv_type == MR_DEV_DRV_TYPE_DPDK_AF_PACKET) { /* the virtio and af_packet is not support rss */ eth_conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE; } if (dev->en_vlan_strip) { eth_conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_VLAN_INSERT; } *out_eth_conf = eth_conf; return 0; } static int dpdk_dev_tx_meter_setup(struct dpdk_dev * dev, unsigned int nr_txq, uint64_t cir, uint64_t cbs, uint64_t ebs) { for (unsigned int txq = 0; txq < nr_txq; txq++) { dev->tx_meter_profile[txq] = ZMALLOC(sizeof(struct rte_meter_srtcm_profile)); MR_VERIFY_MALLOC(dev->tx_meter_profile[txq]); dev->tx_meter[txq] = ZMALLOC(sizeof(struct rte_meter_srtcm)); MR_VERIFY_MALLOC(dev->tx_meter[txq]); struct rte_meter_srtcm_profile * profile = dev->tx_meter_profile[txq]; struct rte_meter_srtcm_params params = { .cir = cir / nr_txq, .cbs = cbs / nr_txq, .ebs = ebs / nr_txq, }; int ret = rte_meter_srtcm_profile_config(profile, ¶ms); if (ret < 0) { MR_ERROR("Failed at setting up tx meter config for device %s, errno = %d", dev->symbol, ret); return ret; } ret = rte_meter_srtcm_config(dev->tx_meter[txq], profile); if (ret < 0) { MR_ERROR("Failed at setting up tx meter for device %s, errno = %d", dev->symbol, ret); return ret; } } MR_INFO("dpdk device %s: tx_meter enable, cir=%lu, cbs=%lu, ebs=%lu", dev->symbol, cir, cbs, ebs); return 0; } static int dpdk_dev_queue_setup_rss(struct dpdk_dev * dev, cpu_set_t * rx_cpu_set, cpu_set_t * tx_cpu_set, unsigned int rxq_index_begin, unsigned int txq_index_begin) { int ret = 0; struct sc_main * sc = sc_main_get(); /* setup the rxconf and txconf for queue */ struct rte_eth_dev_info dev_info; rte_eth_dev_info_get(dev->port_id, &dev_info); /* default rxconf and txconf */ struct rte_eth_rxconf rxconf = dev_info.default_rxconf; struct rte_eth_txconf txconf = dev_info.default_txconf; /* suggestion from dpdk's pmd */ struct rte_eth_dev_portconf * default_rxportconf = &dev_info.default_rxportconf; struct rte_eth_dev_portconf * default_txportconf = &dev_info.default_txportconf; if (dev->nr_rx_descs > 0) { dev->nr_rx_descs = RTE_MIN(dev->nr_rx_descs, dev_info.rx_desc_lim.nb_max); dev->nr_rx_descs = RTE_MAX(dev->nr_rx_descs, dev_info.rx_desc_lim.nb_min); } else { dev->nr_rx_descs = default_rxportconf->ring_size; } if (dev->nr_tx_descs > 0) { dev->nr_tx_descs = RTE_MIN(dev->nr_tx_descs, dev_info.tx_desc_lim.nb_max); dev->nr_tx_descs = RTE_MAX(dev->nr_tx_descs, dev_info.tx_desc_lim.nb_min); } else { dev->nr_tx_descs = default_txportconf->ring_size; } MR_INFO("dpdk device %s: rx_desc=%d, tx_desc=%d", dev->symbol, dev->nr_rx_descs, dev->nr_tx_descs); socket_id_t dev_socket_id = rte_eth_dev_socket_id(dev->port_id); unsigned int nr_rxq_use = CPU_COUNT(rx_cpu_set); unsigned int nr_txq_use = CPU_COUNT(tx_cpu_set); for (unsigned int rxq = 0; rxq < nr_rxq_use; rxq++) { /* get the direct pool by the core and socket */ cpu_id_t cpu_id = cpu_set_location(rx_cpu_set, rxq); socket_id_t socket_id = (socket_id_t)rte_lcore_to_socket_id(cpu_id); struct rte_mempool * pool = mrb_direct_mempool_locate(sc->mrb_pool_main, NULL, socket_id, cpu_id); assert(pool != NULL); ret = rte_eth_rx_queue_setup(dev->port_id, rxq + rxq_index_begin, dev->nr_rx_descs, dev_socket_id, &rxconf, pool); if (ret < 0) { MR_ERROR("dpdk device %s RXQ %d setup failed, errno = %d", dev->symbol, rxq + rxq_index_begin, ret); goto err; } } for (unsigned int txq = 0; txq < nr_txq_use; txq++) { ret = rte_eth_tx_queue_setup(dev->port_id, txq + txq_index_begin, dev->nr_tx_descs, dev_socket_id, &txconf); if (ret < 0) { MR_ERROR("dpdk device %s TXQ %d setup failed, errno = %d", dev->symbol, txq, ret); goto err; } } return 0; err: return ret; } __rte_unused static int dpdk_dev_queue_setup_hairpin(struct dpdk_dev * dev, unsigned int nr_hairpin_q, unsigned int rxq_index_begin, unsigned int txq_index_begin) { /* create hairpin queues on both ports*/ unsigned int q_index_hairpin = 0; unsigned int q_index_hairpin_peer = 0; struct rte_eth_hairpin_conf hairpin_conf = { .peer_count = 1, .manual_bind = 0, .tx_explicit = 0, }; int ret = 0; for (q_index_hairpin = rxq_index_begin, q_index_hairpin_peer = txq_index_begin; q_index_hairpin < rxq_index_begin + nr_hairpin_q; q_index_hairpin++, q_index_hairpin_peer++) { hairpin_conf.peers[0].port = dev->port_id; hairpin_conf.peers[0].queue = q_index_hairpin_peer; MR_DEBUG("Prepare to setup rx hairpin for device %s, hairpin_queue_id = %d, peer_hairpin_queue_id = %d", dev->symbol, q_index_hairpin, q_index_hairpin_peer); ret = rte_eth_rx_hairpin_queue_setup(dev->port_id, q_index_hairpin, dev->nr_rx_descs, &hairpin_conf); if (unlikely(ret != 0)) { MR_ERROR("Failed at setup rx hairpin queue at port = %d, queue = %d, peer_queue = %d, ret = %d: %s", dev->port_id, q_index_hairpin, q_index_hairpin_peer, ret, rte_strerror(rte_errno)); return ret; } } for (q_index_hairpin = txq_index_begin, q_index_hairpin_peer = rxq_index_begin; q_index_hairpin < txq_index_begin + nr_hairpin_q; q_index_hairpin++, q_index_hairpin_peer++) { hairpin_conf.peers[0].port = dev->port_id; hairpin_conf.peers[0].queue = q_index_hairpin_peer; MR_DEBUG("Prepare to setup tx hairpin for device %s, hairpin_queue_id = %d, peer_hairpin_queue_id = %d", dev->symbol, q_index_hairpin, q_index_hairpin_peer); ret = rte_eth_tx_hairpin_queue_setup(dev->port_id, q_index_hairpin, dev->nr_tx_descs, &hairpin_conf); if (unlikely(ret != 0)) { MR_ERROR("Failed at setup tx hairpin queue at port = %d, queue = %d, peer_queue = %d, ret = %d: %s", dev->port_id, q_index_hairpin, q_index_hairpin_peer, ret, rte_strerror(rte_errno)); return ret; } dev->hairpin_q = q_index_hairpin; } MR_INFO("device %s hairpin setup successfully.", dev->symbol); return 0; } __rte_unused static int dpdk_dev_setup_default_flows(struct devmgr_main * devmgr_main, struct dpdk_dev * dev) { /* ----------------- GROUP 0 -------------------- */ struct rte_flow * flow = NULL; struct rte_flow_attr attr = { .group = 0, /* set the rule on the main group. */ .ingress = 1, /* Rx flow. */ .priority = 3, }; struct rte_flow_error flow_error = {}; /* Define the pattern to match the packet */ struct rte_flow_item pattern[] = { [0] = {.type = RTE_FLOW_ITEM_TYPE_ETH}, [1] = {.type = RTE_FLOW_ITEM_TYPE_END}, }; /* Jump parameters */ struct rte_flow_action_jump jump_to_group_1 = {.group = 1}; /* Jump actions */ struct rte_flow_action actions[] = { [0] = { .type = RTE_FLOW_ACTION_TYPE_JUMP, .conf = &jump_to_group_1, }, [1] = { .type = RTE_FLOW_ACTION_TYPE_END, .conf = NULL, }, }; /* FLOW 0 */ flow = rte_flow_create(dev->port_id, &attr, pattern, actions, &flow_error); if (unlikely(flow == NULL)) { MR_ERROR("Failed at install default rule for device %s: %s", dev->symbol, flow_error.message); goto err; } dev->default_flow_handles[dev->nr_default_flow_handles++] = flow; /* ------------------ GROUP 1 ----------------------- */ attr.group = 1; attr.ingress = 1; attr.priority = 3; memset(&flow_error, 0, sizeof(flow_error)); // memset(&pattern, 0, sizeof(pattern)); memset(&actions, 0, sizeof(actions)); uint16_t target_queue_id[RTE_MAX_QUEUES_PER_PORT] = {}; for (unsigned int i = 0; i < dev->nr_rxq; i++) { target_queue_id[i] = i; } uint64_t rss_type = 0; const uint8_t * rss_key = NULL; size_t rss_key_len = 0; /* setup how nics distributes packets */ if (dev->rssmode == MR_DEV_RSSMODE_2TUPLE_SYM) { rss_type = RTE_ETH_RSS_IP; rss_key = default_sym_rss_key; rss_key_len = sizeof(default_sym_rss_key); } else if (dev->rssmode == MR_DEV_RSSMODE_4TUPLE_SYM) { rss_type = RTE_ETH_RSS_NONFRAG_IPV4_TCP | RTE_ETH_RSS_NONFRAG_IPV6_TCP | RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV6_UDP; rss_key = default_sym_rss_key; rss_key_len = sizeof(default_sym_rss_key); } else if (dev->rssmode == MR_DEV_RSSMODE_4TUPLE_ASYM) { rss_type = RTE_ETH_RSS_NONFRAG_IPV4_TCP | RTE_ETH_RSS_NONFRAG_IPV6_TCP | RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV6_UDP; rss_key = NULL; } /* TODO: maybe we have much more better way to do the symmetric hash */ struct rte_flow_action_rss rss = { .level = 0, .queue = target_queue_id, .queue_num = dev->nr_rxq, .types = rss_type, .key = rss_key, .key_len = rss_key_len, }; actions[0].type = RTE_FLOW_ACTION_TYPE_RSS; actions[0].conf = &rss; actions[1].type = RTE_FLOW_ACTION_TYPE_END; actions[1].conf = NULL; flow = rte_flow_create(dev->port_id, &attr, pattern, actions, &flow_error); if (unlikely(flow == NULL)) { MR_ERROR("Failed at install RSS flow at device %s: %s", dev->symbol, flow_error.message); return RT_ERR; } /* Save the default flows, these rules should be destroy when the device is stop */ dev->default_flow_handles[dev->nr_default_flow_handles++] = flow; return RT_SUCCESS; err: return RT_ERR; } static int dpdk_dev_setup_common(struct devmgr_main * devmgr_main, struct dpdk_dev * dev) { int ret = 0; unsigned nr_rxq_use = 0; unsigned nr_txq_use = 0; /* rx队列的数量是rxcores的总数,但tx队列的数量是io核的总数 */ struct mr_dev_desc * ref_dev_desc = dev->ref_dev_desc; assert(ref_dev_desc != NULL); cpu_set_t * rx_cpu_set = &ref_dev_desc->rx_cpu_set; cpu_set_t * tx_cpu_set = &ref_dev_desc->tx_cpu_set; dev->nr_rxq = CPU_COUNT(rx_cpu_set); dev->nr_txq = CPU_COUNT(tx_cpu_set); dev->nr_hairpin_q = dev->en_smartoffload ? 1 : 0; nr_rxq_use = dev->nr_rxq + dev->nr_ctx_rxq + dev->nr_hairpin_q; nr_txq_use = dev->nr_txq + dev->nr_ctx_txq + dev->nr_hairpin_q; // 配置端口信息 struct rte_eth_conf local_eth_conf; gen_dpdk_dev_ethconf(dev, nr_rxq_use, &local_eth_conf); ret = rte_eth_dev_configure(dev->port_id, nr_rxq_use, nr_txq_use, &local_eth_conf); if (ret != 0) { MR_ERROR("dpdk device %s configure error: %s, errno = %d", dev->symbol, strerror(ret), ret); return ret; } unsigned int nr_rxq_index = 0; unsigned int nr_txq_index = 0; /* Configure the RX, TX queues */ ret = dpdk_dev_queue_setup_rss(dev, rx_cpu_set, tx_cpu_set, nr_rxq_index, nr_txq_index); if (ret < 0) { return RT_ERR; } if (dev->en_tx_meter) { ret = dpdk_dev_tx_meter_setup(dev, nr_txq_use, dev->tx_meter_cir, dev->tx_meter_cbs, dev->tx_meter_ebs); if (ret < 0) { return RT_ERR; } } nr_rxq_index += dev->nr_rxq; nr_txq_index += dev->nr_txq; /* MTU */ if (dev->mtu != 0 && (ret = rte_eth_dev_set_mtu(dev->port_id, dev->mtu)) < 0) { MR_WARNING("dpdk device %s MTU setup failed : %s", dev->symbol, strerror(-ret)); } /* Multicast */ if (dev->allmulticast) { rte_eth_allmulticast_enable(dev->port_id); } else { rte_eth_allmulticast_disable(dev->port_id); } /* 混杂模式设置 */ if (dev->promisc) { rte_eth_promiscuous_enable(dev->port_id); } else { rte_eth_promiscuous_disable(dev->port_id); } ret = rte_eth_dev_start(dev->port_id); if (ret < 0) { MR_ERROR("dpdk device %s start failed, Errno = %d(%s)", dev->symbol, ret, strerror(-ret)); return ret; } dev->nr_rxq = nr_rxq_use; dev->nr_txq = nr_txq_use; rte_eth_dev_default_mac_addr_set(dev->port_id, &dev->ether_addr); rte_eth_dev_get_mtu(dev->port_id, &dev->mtu); rte_eth_macaddr_get(dev->port_id, &dev->ether_addr); dev->promisc = rte_eth_promiscuous_get(dev->port_id); int vlan_offload_mask = 0; if (dev->en_vlan_strip) { vlan_offload_mask |= RTE_ETH_VLAN_STRIP_OFFLOAD; } if (dev->en_vlan_filter) { vlan_offload_mask |= RTE_ETH_VLAN_FILTER_OFFLOAD; } if (vlan_offload_mask != 0) { if ((ret = rte_eth_dev_set_vlan_offload(dev->port_id, vlan_offload_mask)) < 0) { MR_WARNING("dpdk device %s VLAN offload mask setup failed: mask=%x, %s", dev->symbol, vlan_offload_mask, strerror(-ret)); } } /* VLAN Filter设置 */ if (dev->en_vlan_filter) { for (unsigned int i = 0; i < dev->nr_vlan_id_allow; i++) { uint16_t vlan_id = (uint16_t)(dev->vlan_id_allow[i]); if ((ret = rte_eth_dev_vlan_filter(dev->port_id, vlan_id, 1)) < 0) { MR_WARNING("dpdk device %s VLAN filter allow vlan-id %d setup failed: %s", dev->symbol, vlan_id, strerror(-ret)); } } for (unsigned int i = 0; i < dev->nr_vlan_id_deny; i++) { uint16_t vlan_id = (uint16_t)(dev->vlan_id_deny[i]); if ((ret = rte_eth_dev_vlan_filter(dev->port_id, vlan_id, 0)) < 0) { MR_WARNING("dpdk device %s VLAN filter deny vlan-id %d setup failed: %s", dev->symbol, vlan_id, strerror(-ret)); } } } dpdk_dev_status_print(dev); dev->inited = 1; dev->enable = 1; return 0; } /* 从全局配置文件读硬件配置信息 */ void dpdk_dev_config_load(struct dpdk_dev * dev_dpdk, const char * cfg) { char str_section[MR_SYMBOL_MAX * 2]; snprintf(str_section, sizeof(str_section), "device:%s", dev_dpdk->symbol); MESA_load_profile_uint_def(cfg, str_section, "vlan-strip", &dev_dpdk->en_vlan_strip, 0); MESA_load_profile_uint_def(cfg, str_section, "vlan-filter", &dev_dpdk->en_vlan_filter, 0); // 允许通过的VLAN ID列表,仅当vlan-filter开启后有效 int ret = 0; ret = MESA_load_profile_uint_range(cfg, str_section, "vlan-id-allow", RTE_DIM(dev_dpdk->vlan_id_allow), dev_dpdk->vlan_id_allow); if (ret >= 0) { dev_dpdk->nr_vlan_id_allow = ret; } // 禁止通过的VLAN ID列表,仅当vlan-filter开启后有效 ret = MESA_load_profile_uint_range(cfg, str_section, "vlan-id-deny", RTE_DIM(dev_dpdk->vlan_id_deny), dev_dpdk->vlan_id_deny); if (ret >= 0) { dev_dpdk->nr_vlan_id_deny = ret; } // 丢包选项 MESA_load_profile_uint_def(cfg, str_section, "drop_en", &dev_dpdk->en_drop, 0); // 分流模式 MESA_load_profile_uint_def(cfg, str_section, "rssmode", &dev_dpdk->rssmode, 0); // RX描述符数量 MESA_load_profile_uint_def(cfg, str_section, "nr_rxdesc", &dev_dpdk->nr_rx_descs, 0); // TX描述符数量 MESA_load_profile_uint_def(cfg, str_section, "nr_txdesc", &dev_dpdk->nr_tx_descs, 0); // 读MTU,网卡自适应。 MESA_load_profile_short_def(cfg, str_section, "mtu", (short *)&dev_dpdk->mtu, 0); // 读混杂模式 MESA_load_profile_uint_def(cfg, str_section, "promisc", &dev_dpdk->promisc, 0); // MULTICAST MESA_load_profile_uint_def(cfg, str_section, "allmulticast", &dev_dpdk->allmulticast, 0); /* tx meter */ MESA_load_profile_uint_def(cfg, str_section, "en_tx_meter", &dev_dpdk->en_tx_meter, 0); if (dev_dpdk->en_tx_meter) { unsigned int tx_meter_cir_in_Kbps = 0; unsigned int tx_meter_cbs_in_KB = 0; unsigned int tx_meter_ebs_in_KB = 0; MESA_load_profile_uint_def(cfg, str_section, "tx_meter_cir_in_Kbps", &tx_meter_cir_in_Kbps, 0); MESA_load_profile_uint_def(cfg, str_section, "tx_meter_cbs_in_KB", &tx_meter_cbs_in_KB, 0); MESA_load_profile_uint_def(cfg, str_section, "tx_meter_ebs_in_KB", &tx_meter_ebs_in_KB, 0); MESA_load_profile_uint_def(cfg, str_section, "tx_meter_yellow_pkt_delay_in_us", &dev_dpdk->tx_meter_yellow_pkt_delay_us, 1); dev_dpdk->tx_meter_cir = (uint64_t)tx_meter_cir_in_Kbps * 1000 / 8; dev_dpdk->tx_meter_cbs = (uint64_t)tx_meter_cbs_in_KB * 1024; dev_dpdk->tx_meter_ebs = (uint64_t)tx_meter_ebs_in_KB * 1024; } // which mempool used memset(dev_dpdk->str_direct_pool, 0, sizeof(dev_dpdk->str_direct_pool)); memset(dev_dpdk->str_indirect_pool, 0, sizeof(dev_dpdk->str_indirect_pool)); MESA_load_profile_string_def(cfg, str_section, "direct-pool", dev_dpdk->str_direct_pool, sizeof(dev_dpdk->str_direct_pool), ""); MESA_load_profile_string_def(cfg, str_section, "indirect-pool", dev_dpdk->str_indirect_pool, sizeof(dev_dpdk->str_indirect_pool), ""); /* TODO: allow the user to set different io cores for each devices */ } #if 0 static struct dpdk_dev_candidate * dpdk_dev_candidate_lookup_by_pci_addr(struct devmgr_main * devmgr_main, struct rte_pci_addr pci_addr) { struct dpdk_dev_candidate * dev_iter = NULL; TAILQ_FOREACH(dev_iter, &devmgr_main->dpdk_dev_candidate_list, next) { if (rte_pci_addr_cmp(&dev_iter->pci_addr, &pci_addr) == 0) return dev_iter; } return NULL; } static struct dpdk_dev_candidate * dpdk_dev_candidate_lookup_by_vdev_name(struct devmgr_main * devmgr_main, const char * vdev_name) { struct dpdk_dev_candidate * dev_iter = NULL; TAILQ_FOREACH(dev_iter, &devmgr_main->dpdk_dev_candidate_list, next) { if (strcmp(dev_iter->dpdk_vdev_name, vdev_name) == 0) return dev_iter; } return NULL; } #endif static struct dpdk_dev_candidate * dpdk_dev_candidate_lookup_by_devname(struct devmgr_main * devmgr_main, const char * devname) { struct dpdk_dev_candidate * dev_iter = NULL; TAILQ_FOREACH(dev_iter, &devmgr_main->dpdk_dev_candidate_list, next) { if (strcmp(dev_iter->devname, devname) == 0) return dev_iter; } return NULL; } static int dpdk_dev_candidate_vdev_register(struct devmgr_main * devmgr_main, const char * kernel_name, const char * vdev_name) { struct dpdk_dev_candidate * dev = malloc(sizeof(struct dpdk_dev_candidate)); MR_VERIFY_MALLOC(dev); snprintf(dev->kernel_name, sizeof(dev->kernel_name), "%s", kernel_name); snprintf(dev->dpdk_vdev_name, sizeof(dev->dpdk_vdev_name), "%s", vdev_name); snprintf(dev->devname, sizeof(dev->devname), "%s", vdev_name); TAILQ_INSERT_TAIL(&devmgr_main->dpdk_dev_candidate_list, dev, next); return RT_SUCCESS; } static int dpdk_dev_candidate_pci_register(struct devmgr_main * devmgr_main, const char * devsym) { /* 在JSON文件中查找设备,根据网卡名称查找 */ cJSON * j_hwfile = devmgr_main->j_hwfile; cJSON * j_dev = NULL; if (j_hwfile == NULL) { return RT_SUCCESS; } cJSON_ArrayForEach(j_dev, j_hwfile) { /* 先获取设备名称 */ cJSON * j_interface = cJSON_GetObjectItem(j_dev, "Interface"); MR_VERIFY(j_interface != NULL); if (strcasecmp(devsym, j_interface->valuestring) != 0) continue; /* 获取PCI号 */ cJSON * j_slot = cJSON_GetObjectItem(j_dev, "Slot"); MR_VERIFY(j_slot != NULL); /* Driver */ cJSON * j_driver = cJSON_GetObjectItem(j_dev, "Driver_str"); MR_VERIFY(j_driver != NULL); struct dpdk_dev_candidate * dev = malloc(sizeof(struct dpdk_dev_candidate)); MR_VERIFY_MALLOC(dev); snprintf(dev->kernel_name, sizeof(dev->kernel_name) - 1, "%s", j_interface->valuestring); snprintf(dev->driver, sizeof(dev->driver) - 1, "%s", j_driver->valuestring); snprintf(dev->str_pci_addr, sizeof(dev->str_pci_addr) - 1, "%s", j_slot->valuestring); /* parse the pci addr */ int ret = rte_pci_addr_parse(j_slot->valuestring, &dev->pci_addr); if (unlikely(ret < 0)) { MR_ERROR("Failed at parsing device %s's pci_addr from hwfile, ignore.", dev->kernel_name); free(dev); continue; } rte_pci_device_name(&dev->pci_addr, dev->devname, sizeof(dev->devname)); MR_DEBUG("DPDK device candidate: kernel_name=%s, pci_addr=%s, driver=%s", dev->kernel_name, dev->str_pci_addr, dev->driver); TAILQ_INSERT_TAIL(&devmgr_main->dpdk_dev_candidate_list, dev, next); return RT_SUCCESS; } return RT_NOEXIST; } /* 早期设备扫描,从HWFILE中获取设备定义的信息 */ static int dpdk_dev_early_scan(struct devmgr_main * devmgr_main) { unsigned int nr_dev_symbols = 0; char ** str_dev_symbols = gcfg_device_syms_get_by_drv(devmgr_main->sc, MR_DEV_DRV_TYPE_DPDK_PCI, &nr_dev_symbols); /* 遍历所有dev设备 */ for (int i = 0; i < nr_dev_symbols; i++) { int ret = dpdk_dev_candidate_pci_register(devmgr_main, str_dev_symbols[i]); if (unlikely(ret < 0)) { MR_WARNING("device %s cannot be used as dpdk_pci device: not existed in the hwfile.", str_dev_symbols[i]); continue; } } for (int i = 0; i < nr_dev_symbols; i++) { free(str_dev_symbols[i]); } free(str_dev_symbols); return RT_SUCCESS; } static int dpdk_dev_af_packet_attach(struct devmgr_main * devmgr_main) { /* Get all virtio_user devices */ char ** af_packet_devsyms = NULL; unsigned int nr_af_packet_dev = 0; /* main handle */ struct sc_main * sc = devmgr_main->sc; assert(sc != NULL); int ret = RT_SUCCESS; /* query the virtio device symbols */ af_packet_devsyms = gcfg_device_syms_get_by_drv(sc, MR_DEV_DRV_TYPE_DPDK_AF_PACKET, &nr_af_packet_dev); if (unlikely(nr_af_packet_dev < 0)) { return RT_SUCCESS; } for (unsigned int i = 0; i < nr_af_packet_dev; i++) { uint16_t port_id; char vdev_name[MR_SYMBOL_MAX * 2]; /* dpdk会根据vdev_name获取对应的驱动,所以针对virtio类型的设备,此处命名必须为virtio_user开头 */ snprintf(vdev_name, sizeof(vdev_name) - 1, "eth_af_packet%d", i); char vdev_args[MR_SYMBOL_MAX * 2]; snprintf(vdev_args, sizeof(vdev_args) - 1, "qpairs=%d,iface=%s", sc->nr_io_thread, af_packet_devsyms[i]); ret = rte_eal_hotplug_add("vdev", vdev_name, vdev_args); if (ret < 0) { MR_ERROR("Attaching af_packet device %s failed, errno = %d", af_packet_devsyms[i], ret); goto errout; } ret = rte_eth_dev_get_port_by_name(vdev_name, &port_id); if (ret < 0) { rte_eal_hotplug_remove("vdev", vdev_name); MR_ERROR("Cannot find added vdev %s failed, errno = %d", af_packet_devsyms[i], ret); goto errout; } dpdk_dev_candidate_vdev_register(devmgr_main, af_packet_devsyms[i], vdev_name); MR_INFO("af_packet device: %s attach successful, port_id=%d", af_packet_devsyms[i], port_id); } ret = RT_SUCCESS; goto out; errout: ret = RT_ERR; goto out; out: for (unsigned int i = 0; i < nr_af_packet_dev; i++) { free(af_packet_devsyms[i]); } free(af_packet_devsyms); return ret; } struct bond_dev_config { unsigned int mode; unsigned int xmit_policy; char str_dev_slaves[MR_SYMBOL_MAX][MR_DEVICE_MAX]; unsigned int nr_dev_slaves; unsigned int is_bond_ether_addr_set; struct rte_ether_addr bond_ether_addr; }; static struct bond_dev_config * bond_dev_config_load(struct devmgr_main * devmgr_main, const char * devsym) { struct sc_main * sc = devmgr_main->sc; const char * cfg = sc->local_cfgfile; assert(sc != NULL); struct bond_dev_config * bond_cfg = ZMALLOC(sizeof(struct bond_dev_config)); MR_VERIFY_MALLOC(bond_cfg); char str_section[MR_SYMBOL_MAX]; snprintf(str_section, sizeof(str_section), "device:%s", devsym); char str_slaves[MR_STRING_MAX] = {}; MESA_load_profile_string_nodef(sc->local_cfgfile, str_section, "bond_slaves", str_slaves, sizeof(str_slaves)); char * str_slave_tokens[MR_TOKENS_MAX] = {}; int nr_str_tokens = rte_strsplit(str_slaves, sizeof(str_slaves), str_slave_tokens, MR_TOKENS_MAX, ','); if (unlikely(nr_str_tokens < 0)) { goto errout; } for (unsigned int i = 0; i < nr_str_tokens; i++) { strncpy(bond_cfg->str_dev_slaves[i], str_slave_tokens[i], sizeof(bond_cfg->str_dev_slaves[i]) - 1); } bond_cfg->nr_dev_slaves = nr_str_tokens; MESA_load_profile_uint_def(cfg, str_section, "bond_mode", &bond_cfg->mode, BONDING_MODE_BALANCE); MESA_load_profile_uint_def(cfg, str_section, "bond_xmit_policy", &bond_cfg->xmit_policy, BALANCE_XMIT_POLICY_LAYER34); return bond_cfg; errout: if (bond_cfg != NULL) rte_free(bond_cfg); return NULL; } int dpdk_dev_setup_from_candidate(struct devmgr_main * devmgr_main, struct dpdk_dev_candidate * dev_can, port_id_t port_id); static int bond_dev_init_one_device(struct devmgr_main * devmgr_main, const char * str_bond) { struct dpdk_dev_candidate * dev_can = NULL; struct bond_dev_config * bond_dev_cfg = NULL; bond_dev_cfg = bond_dev_config_load(devmgr_main, str_bond); if (unlikely(bond_dev_cfg == NULL)) { MR_ERROR("Failed at loading bond device %s's configuration, ignore it.", str_bond); goto errout; } /* create the bond device */ char str_bond_vdev[MR_SYMBOL_MAX * 2]; snprintf(str_bond_vdev, sizeof(str_bond_vdev) - 1, "net_bonding%s", str_bond); MR_INFO("bond_device %s: mode=%s, xmit_policy=%s", str_bond, str_bond_mode(bond_dev_cfg->mode), str_bond_xmit_policy(bond_dev_cfg->xmit_policy)); int bond_port_id = rte_eth_bond_create(str_bond_vdev, bond_dev_cfg->mode, 0); if (unlikely(bond_port_id < 0)) { MR_ERROR("Failed at creating bond device %s: rte_eth_bond_create() ret = %d", str_bond, bond_port_id); goto errout; } int ret = rte_eth_bond_xmit_policy_set(bond_port_id, bond_dev_cfg->xmit_policy); if (unlikely(ret < 0)) { MR_ERROR("Failed at setting xmit policy for bond %s: ret = %d", str_bond, ret); goto errout; } /* get the dev's desc and port_id */ for (unsigned int slave_iter = 0; slave_iter < bond_dev_cfg->nr_dev_slaves; slave_iter++) { const char * str_dev_slave = bond_dev_cfg->str_dev_slaves[slave_iter]; assert(str_dev_slave != NULL); struct mr_dev_desc * slave_dev_desc = mr_dev_desc_lookup(devmgr_main, str_dev_slave); if (unlikely(slave_dev_desc == NULL)) { MR_ERROR("Failed at join %s to bond %s: slave is not existed.", str_dev_slave, str_bond); goto errout; } if (unlikely(slave_dev_desc->dpdk_dev_desc == NULL)) { MR_ERROR("Failed at join %s to bond %s: slave must be a dpdk device.", str_dev_slave, str_bond); goto errout; } port_id_t slave_port_id = slave_dev_desc->port_id; #if RTE_VERSION >= RTE_VERSION_NUM(23, 11, 0, 0) ret = rte_eth_bond_member_add(bond_port_id, slave_port_id); #else ret = rte_eth_bond_slave_add(bond_port_id, slave_port_id); #endif if (unlikely(ret < 0)) { MR_ERROR("Failed at join %s to bond %s: ret = %d", str_dev_slave, str_bond, ret); goto errout; } /* for the slave device */ slave_dev_desc->is_bond_slave = 1; } /* set the mac address */ if (bond_dev_cfg->is_bond_ether_addr_set) { ret = rte_eth_bond_mac_address_set(bond_port_id, &bond_dev_cfg->bond_ether_addr); if (unlikely(ret < 0)) { MR_ERROR("Failed at set mac address for bond %s: ret = %d", str_bond, ret); goto errout; } } /* register an dpdk_dev candidate, the kernel name and dpdk vdev name are same */ ret = dpdk_dev_candidate_vdev_register(devmgr_main, str_bond, str_bond_vdev); if (unlikely(ret < 0)) { MR_ERROR("Failed at register dpdk candidate for bond %s.", str_bond); goto errout; } /* get the candidate desc */ dev_can = dpdk_dev_candidate_lookup_by_devname(devmgr_main, str_bond_vdev); assert(dev_can != NULL); ret = dpdk_dev_setup_from_candidate(devmgr_main, dev_can, bond_port_id); if (unlikely(ret < 0)) { MR_ERROR("Failed at dpdk device setup for bond %s.", str_bond); goto errout; } /* remove the dev candidate */ TAILQ_REMOVE(&devmgr_main->dpdk_dev_candidate_list, dev_can, next); /* get the master desc */ struct mr_dev_desc * bond_dev_desc = mr_dev_desc_lookup(devmgr_main, str_bond); bond_dev_desc->type = MR_DEV_TYPE_BOND; bond_dev_desc->is_bond_master = 1; if (bond_dev_cfg->mode == BONDING_MODE_8023AD) { bond_dev_desc->en_periodic_rx_tx = 1; } rte_free(bond_dev_cfg); /* success */ MR_INFO("Bond device %s created successfully: port_id = %d", str_bond, bond_port_id); return RT_SUCCESS; errout: if (bond_dev_cfg != NULL) { rte_free(bond_dev_cfg); } if (dev_can != NULL) { rte_free(dev_can); } return RT_ERR; } int bond_dev_init(struct devmgr_main * devmgr_main) { char ** bond_devsyms = NULL; unsigned int nr_bond_devsyms = 0; struct sc_main * sc = devmgr_main->sc; assert(sc != NULL); bond_devsyms = gcfg_device_syms_get_by_type(sc, MR_DEV_TYPE_BOND, &nr_bond_devsyms); if (unlikely(nr_bond_devsyms < 0)) { return RT_SUCCESS; } for (unsigned int i = 0; i < nr_bond_devsyms; i++) { int ret = bond_dev_init_one_device(devmgr_main, bond_devsyms[i]); if (unlikely(ret < 0)) { rte_free(bond_devsyms); return RT_ERR; } } return RT_SUCCESS; } /* VIRTIO Devices */ static int dpdk_dev_virtio_user_attach(struct devmgr_main * devmgr_main) { /* Get all virtio_user devices */ char ** virtio_devsyms = NULL; unsigned int nr_virtio_devsyms = 0; /* main handle */ struct sc_main * sc = devmgr_main->sc; assert(sc != NULL); /* query the virtio device symbols */ virtio_devsyms = gcfg_device_syms_get_by_drv(sc, MR_DEV_DRV_TYPE_DPDK_VIRTIO_USER, &nr_virtio_devsyms); if (unlikely(nr_virtio_devsyms < 0)) { return RT_SUCCESS; } for (unsigned int i = 0; i < nr_virtio_devsyms; i++) { uint16_t port_id; char vdev_name[MR_STRING_MAX]; /* dpdk会根据vdev_name获取对应的驱动,所以针对virtio类型的设备,此处命名必须为virtio_user开头 */ snprintf(vdev_name, sizeof(vdev_name) - 1, "virtio_user%d", i); char vdev_args[MR_STRING_MAX]; snprintf(vdev_args, sizeof(vdev_args) - 1, "queues=%d,queue_size=1024,path=/dev/vhost-net,iface=%s", sc->nr_io_thread, virtio_devsyms[i]); int ret = rte_eal_hotplug_add("vdev", vdev_name, vdev_args); if (ret < 0) { MR_ERROR("Attaching virtio-user device %s failed, errno = %d", virtio_devsyms[i], ret); return RT_ERR; } ret = vhost_dev_setup(virtio_devsyms[i]); if (ret < 0) { MR_ERROR("Cannot set the tap to up for device: %s\n", virtio_devsyms[i]); return RT_ERR; } ret = rte_eth_dev_get_port_by_name(vdev_name, &port_id); if (ret < 0) { rte_eal_hotplug_remove("vdev", vdev_name); MR_ERROR("Cannot find added vdev %s failed, errno = %d", virtio_devsyms[i], ret); return RT_ERR; } dpdk_dev_candidate_vdev_register(devmgr_main, virtio_devsyms[i], vdev_name); MR_INFO("virtio_user device: %s attach successful, port_id=%d", virtio_devsyms[i], port_id); } for (unsigned int i = 0; i < nr_virtio_devsyms; i++) { free(virtio_devsyms[i]); } free(virtio_devsyms); return RT_SUCCESS; } void * dpdk_dev_link_state_update_thread(void * arg); int dpdk_dev_setup_from_candidate(struct devmgr_main * devmgr_main, struct dpdk_dev_candidate * dev_can, port_id_t port_id) { struct dpdk_dev * dev_dpdk = ZMALLOC(sizeof(struct dpdk_dev)); MR_VERIFY_MALLOC(dev_dpdk); /* copy name and pci address from the candidate */ dev_dpdk->port_id = port_id; snprintf(dev_dpdk->symbol, sizeof(dev_dpdk->symbol) - 1, "%s", dev_can->kernel_name); memcpy(&dev_dpdk->pci_addr, &dev_can->pci_addr, sizeof(dev_dpdk->pci_addr)); /* next, create the dev desc */ struct mr_dev_desc * dev_desc = mr_dev_desc_create(devmgr_main, dev_dpdk->symbol); if (unlikely(dev_desc == NULL)) { MR_ERROR("Failed at create device descriptor for device %s", dev_dpdk->symbol); return RT_ERR; } /* load config */ int ret = mr_dev_desc_config_load(devmgr_main, dev_desc); if (unlikely(ret < 0)) { MR_ERROR("Failed at loading config for the dpdk device %s", dev_dpdk->symbol); return RT_ERR; } /* attach dpdk_dev structure to dev_desc */ assert(dev_desc->drv_type == MR_DEV_DRV_TYPE_DPDK_PCI || dev_desc->drv_type == MR_DEV_DRV_TYPE_DPDK_VIRTIO_USER || dev_desc->drv_type == MR_DEV_DRV_TYPE_DPDK_AF_PACKET); /* create dual-link between dev_desc and dev_dpdk */ dev_desc->port_id = port_id; dev_desc->dpdk_dev_desc = dev_dpdk; dev_dpdk->ref_dev_desc = dev_desc; /* read device's capability */ struct rte_eth_dev_info dev_info; rte_eth_dev_info_get(port_id, &dev_info); /* fill the mac address from conf */ rte_ether_addr_copy(&dev_desc->eth_addr, &dev_dpdk->ether_addr); rte_eth_dev_get_mtu(dev_dpdk->port_id, &dev_dpdk->mtu); /* load user settings */ const char * cfgfile = devmgr_main->sc->local_cfgfile; dpdk_dev_config_load(dev_dpdk, cfgfile); ret = dpdk_dev_setup_common(devmgr_main, dev_dpdk); if (unlikely(ret < 0)) { MR_ERROR("Failed at setup the dpdk device %s", dev_dpdk->symbol); return RT_ERR; } /* join the device list */ assert(devmgr_main->dev_descs[port_id] == NULL); devmgr_main->dev_descs[port_id] = dev_desc; return RT_SUCCESS; } int dpdk_dev_init(struct devmgr_main * devmgr_main) { /* setup all dpdk devices, at first, iterate the pci bus */ port_id_t port_id; int ret = 0; /* all ethernet devices connect to pci bus */ RTE_ETH_FOREACH_DEV(port_id) { char devname[RTE_ETH_NAME_MAX_LEN]; ret = rte_eth_dev_get_name_by_port(port_id, devname); if (unlikely(ret < 0)) { MR_ERROR("Failed to get devname, ignore it: port_id=%d", port_id); continue; } struct dpdk_dev_candidate * dev_can = NULL; dev_can = dpdk_dev_candidate_lookup_by_devname(devmgr_main, devname); if (unlikely(dev_can == NULL)) { MR_ERROR("Failed at lookup dpdk_dev_candidate, ignore it: port_id=%d", port_id); continue; } ret = dpdk_dev_setup_from_candidate(devmgr_main, dev_can, port_id); if (ret < 0) { MR_ERROR("Failed at dpdk device setup from candidate: port_id = %d", port_id); continue; } /* success, remove the candidate from list */ TAILQ_REMOVE(&devmgr_main->dpdk_dev_candidate_list, dev_can, next); } return RT_SUCCESS; } void dpdk_dev_stat_get(struct dpdk_dev * dev, struct dpdk_dev_stats * dpdk_dev_stat) { rte_eth_stats_get(dev->port_id, &dev->stat.rte); *dpdk_dev_stat = dev->stat; } void dpdk_dev_stat_last_save(struct dpdk_dev * dev, struct dpdk_dev_stats * dpdk_dev_stat_last) { dev->stat_last = *dpdk_dev_stat_last; } void dpdk_dev_stat_last_get(struct dpdk_dev * dev, struct dpdk_dev_stats * dpdk_dev_stat_last) { *dpdk_dev_stat_last = dev->stat_last; } void * dpdk_dev_link_state_update_thread(void * arg) { struct devmgr_main * devmgr_main = (struct devmgr_main *)arg; pthread_detach(pthread_self()); mr_thread_setname(pthread_self(), "MRZCPD_DPDK_DEV_LINK_UPDATE"); while (g_keep_running) { for (unsigned int i = 0; i < RTE_DIM(devmgr_main->dev_descs); i++) { struct mr_dev_desc * dev_desc = devmgr_main->dev_descs[i]; if (dev_desc == NULL || dev_desc->dpdk_dev_desc == NULL) continue; struct rte_eth_link now_eth_link; struct dpdk_dev * dpdk_dev = dev_desc->dpdk_dev_desc; rte_eth_link_get_nowait(dpdk_dev->port_id, &now_eth_link); struct rte_eth_link old_eth_link = dpdk_dev->link_status; /* print log when the link status changed */ if (memcmp(&now_eth_link, &old_eth_link, sizeof(struct rte_eth_link)) != 0) { char str_eth_link_status[MR_STRING_MAX]; rte_eth_link_to_str(str_eth_link_status, sizeof(str_eth_link_status) - 1, &now_eth_link); MR_INFO("device %s link status changed: %s", dev_desc->symbol, str_eth_link_status); } /* remember the new status */ dpdk_dev->link_status = now_eth_link; } sleep(1); } return (void *)0; } static int devmgr_hwfile_load(struct devmgr_main * devmgr_main, const char * hwfile_path) { FILE * f = fopen(hwfile_path, "rb"); if (f == NULL) { MR_ERROR("Cannot open hardware file %s: %s", hwfile_path, strerror(errno)); return RT_ERR; } fseek(f, 0, SEEK_END); long fsize = ftell(f); fseek(f, 0, SEEK_SET); // same as rewind(f); char * string = malloc(fsize + 1); MR_VERIFY_MALLOC(string); /* 读文件 */ fread(string, fsize, 1, f); fclose(f); string[fsize] = 0; cJSON * j_hwfile = cJSON_Parse(string); if (j_hwfile == NULL) { MR_ERROR("Hardware file %s parse failed. ", hwfile_path); return RT_ERR; } devmgr_main->j_hwfile = j_hwfile; return RT_SUCCESS; } // 物理网卡统计计数 static cJSON * dpdk_dev_monit_stats(struct dpdk_dev * dev) { struct cJSON * j_device_stats = cJSON_CreateObject(); struct cJSON * j_device_value = cJSON_CreateObject(); struct cJSON * j_device_speed = cJSON_CreateObject(); struct dpdk_dev_stats _dpdk_dev_stat = {}; struct dpdk_dev_stats _dpdk_dev_stat_last = {}; dpdk_dev_stat_get(dev, &_dpdk_dev_stat); dpdk_dev_stat_last_get(dev, &_dpdk_dev_stat_last); dpdk_dev_stat_last_save(dev, &_dpdk_dev_stat); struct rte_eth_stats _eth_stat = _dpdk_dev_stat.rte; struct rte_eth_stats _eth_stat_last = _dpdk_dev_stat_last.rte; uint64_t user_rx_drop_total = 0; uint64_t user_rx_drop_last = 0; uint64_t user_tx_drop_total = 0; uint64_t user_tx_drop_last = 0; for (int i = 0; i < RTE_DIM(_dpdk_dev_stat.tx_drop_counter); i++) { user_tx_drop_total += _dpdk_dev_stat.tx_drop_counter[i]; } for (int i = 0; i < RTE_DIM(_dpdk_dev_stat_last.tx_drop_counter); i++) { user_tx_drop_last += _dpdk_dev_stat_last.tx_drop_counter[i]; } cJSON_AddNumberToObject(j_device_value, "ipackets", _eth_stat.ipackets); cJSON_AddNumberToObject(j_device_value, "opackets", _eth_stat.opackets); cJSON_AddNumberToObject(j_device_value, "ibytes", _eth_stat.ibytes * 8); cJSON_AddNumberToObject(j_device_value, "obytes", _eth_stat.obytes * 8); cJSON_AddNumberToObject(j_device_value, "imissed", _eth_stat.imissed); cJSON_AddNumberToObject(j_device_value, "ierrors", _eth_stat.ierrors); cJSON_AddNumberToObject(j_device_value, "oerrors", _eth_stat.oerrors); cJSON_AddNumberToObject(j_device_value, "rxnombuf", _eth_stat.rx_nombuf); cJSON_AddNumberToObject(j_device_value, "userrxdrop", user_rx_drop_total); cJSON_AddNumberToObject(j_device_value, "usertxdrop", user_tx_drop_total); cJSON_AddNumberToObject(j_device_speed, "ipackets", _eth_stat.ipackets - _eth_stat_last.ipackets); cJSON_AddNumberToObject(j_device_speed, "opackets", _eth_stat.opackets - _eth_stat_last.opackets); cJSON_AddNumberToObject(j_device_speed, "ibytes", (_eth_stat.ibytes - _eth_stat_last.ibytes) * 8); cJSON_AddNumberToObject(j_device_speed, "obytes", (_eth_stat.obytes - _eth_stat_last.obytes) * 8); cJSON_AddNumberToObject(j_device_speed, "imissed", _eth_stat.imissed - _eth_stat_last.imissed); cJSON_AddNumberToObject(j_device_speed, "ierrors", _eth_stat.ierrors - _eth_stat_last.ierrors); cJSON_AddNumberToObject(j_device_speed, "oerrors", _eth_stat.oerrors - _eth_stat_last.oerrors); cJSON_AddNumberToObject(j_device_speed, "rxnombuf", _eth_stat.rx_nombuf - _eth_stat_last.rx_nombuf); cJSON_AddNumberToObject(j_device_speed, "userrxdrop", user_rx_drop_total - user_rx_drop_last); cJSON_AddNumberToObject(j_device_speed, "usertxdrop", user_tx_drop_total - user_tx_drop_last); cJSON_AddItemToObject(j_device_stats, "accumulative", j_device_value); cJSON_AddItemToObject(j_device_stats, "speed", j_device_speed); return j_device_stats; } // 物理网卡链路信息输出 static cJSON * dpdk_dev_monit_link_status(struct mr_dev_desc * dev) { struct dpdk_dev * dpdk_dev_desc = dev->dpdk_dev_desc; assert(dpdk_dev_desc != NULL); struct cJSON * j_device_link = cJSON_CreateObject(); cJSON_AddNumberToObject(j_device_link, "LinkSpeed", dpdk_dev_desc->link_status.link_speed); cJSON_AddBoolToObject(j_device_link, "LinkDuplex", dpdk_dev_desc->link_status.link_duplex); cJSON_AddBoolToObject(j_device_link, "LinkAutoNeg", dpdk_dev_desc->link_status.link_autoneg); cJSON_AddBoolToObject(j_device_link, "LinkStatus", dpdk_dev_desc->link_status.link_status); return j_device_link; } static cJSON * dpdk_dev_monit_bonds(struct devmgr_main * devmgr_main, struct mr_dev_desc * dev) { if (!dev->is_bond_master) { return NULL; } struct dpdk_dev * dpdk_dev_desc = dev->dpdk_dev_desc; assert(dpdk_dev_desc != NULL); struct cJSON * j_device_bond_status = cJSON_CreateObject(); int bonding_mode = rte_eth_bond_mode_get(dpdk_dev_desc->port_id); if (unlikely(bonding_mode < 0)) { goto errout; } cJSON_AddStringToObject(j_device_bond_status, "BondMode", str_bond_mode(bonding_mode)); if (bonding_mode == BONDING_MODE_BALANCE || bonding_mode == BONDING_MODE_8023AD) { int balance_xmit_policy = rte_eth_bond_xmit_policy_get(dpdk_dev_desc->port_id); if (unlikely(balance_xmit_policy < 0)) { goto errout; } cJSON_AddStringToObject(j_device_bond_status, "XmitPolicy", str_bond_xmit_policy(balance_xmit_policy)); } if (bonding_mode == BONDING_MODE_8023AD) { int agg_selection = rte_eth_bond_8023ad_agg_selection_get(dpdk_dev_desc->port_id); if (unlikely(agg_selection < 0)) { goto errout; } cJSON_AddStringToObject(j_device_bond_status, "AggSel", str_bond_agg_selection(agg_selection)); } /* ------------------------------ SLAVES ------------------------------- */ uint16_t slaves[RTE_MAX_ETHPORTS] = {}; #if RTE_VERSION >= RTE_VERSION_NUM(23, 11, 0, 0) int num_slaves = rte_eth_bond_members_get(dpdk_dev_desc->port_id, slaves, RTE_MAX_ETHPORTS); #else int num_slaves = rte_eth_bond_slaves_get(dpdk_dev_desc->port_id, slaves, RTE_MAX_ETHPORTS); #endif if (unlikely(num_slaves < 0)) { goto errout; } cJSON * j_slaves = cJSON_CreateArray(); for (int i = 0; i < num_slaves; i++) { struct mr_dev_desc * slave_dev_desc = mr_dev_desc_lookup_by_port_id(devmgr_main, slaves[i]); if (unlikely(slave_dev_desc == NULL)) { goto errout; } cJSON_AddItemToArray(j_slaves, cJSON_CreateString(slave_dev_desc->symbol)); } cJSON_AddItemToObject(j_device_bond_status, "Slaves", j_slaves); /* ------------------------------ ACTIVE SLAVES ------------------------------- */ uint16_t active_slaves[RTE_MAX_ETHPORTS] = {}; #if RTE_VERSION >= RTE_VERSION_NUM(23, 11, 0, 0) int num_active_slaves = rte_eth_bond_active_members_get(dpdk_dev_desc->port_id, active_slaves, RTE_MAX_ETHPORTS); #else int num_active_slaves = rte_eth_bond_active_slaves_get(dpdk_dev_desc->port_id, active_slaves, RTE_MAX_ETHPORTS); #endif if (unlikely(num_active_slaves < 0)) { goto errout; } cJSON * j_active_slaves = cJSON_CreateArray(); for (int i = 0; i < num_active_slaves; i++) { struct mr_dev_desc * active_slave_desc = mr_dev_desc_lookup_by_port_id(devmgr_main, slaves[i]); if (unlikely(active_slave_desc == NULL)) { goto errout; } cJSON_AddItemToArray(j_active_slaves, cJSON_CreateString(active_slave_desc->symbol)); } cJSON_AddItemToObject(j_device_bond_status, "ActiveSlaves", j_active_slaves); /* -------------------------------- PRIMARY --------------------------------- */ int primary_id = rte_eth_bond_primary_get(dpdk_dev_desc->port_id); if (unlikely(primary_id < 0)) { goto errout; } struct mr_dev_desc * primary_dev_desc = mr_dev_desc_lookup_by_port_id(devmgr_main, primary_id); if (unlikely(primary_dev_desc == NULL)) { goto errout; } cJSON_AddStringToObject(j_device_bond_status, "Primary", primary_dev_desc->symbol); return j_device_bond_status; errout: if (j_device_bond_status != NULL) { cJSON_Delete(j_device_bond_status); j_device_bond_status = NULL; } return NULL; } // 物理网卡基本信息输出 static cJSON * dpdk_dev_monit_info(struct mr_dev_desc * dev) { struct dpdk_dev * dpdk_dev_desc = dev->dpdk_dev_desc; assert(dpdk_dev_desc != NULL); char str_ether_addr[MR_STRING_MAX]; rte_ether_format_addr(str_ether_addr, sizeof(str_ether_addr), &dpdk_dev_desc->ether_addr); char str_pci_addr[PCI_PRI_STR_SIZE] = {0}; struct rte_pci_addr * pci_addr = &dpdk_dev_desc->pci_addr; snprintf(str_pci_addr, sizeof(str_pci_addr), PCI_PRI_FMT, pci_addr->domain, pci_addr->bus, pci_addr->devid, pci_addr->function); struct cJSON * j_device_info = cJSON_CreateObject(); cJSON_AddStringToObject(j_device_info, "EtherAddr", str_ether_addr); cJSON_AddStringToObject(j_device_info, "PCIAddr", str_pci_addr); cJSON_AddNumberToObject(j_device_info, "PortID", dpdk_dev_desc->port_id); cJSON_AddNumberToObject(j_device_info, "RxQueueCount", dpdk_dev_desc->nr_rxq); cJSON_AddNumberToObject(j_device_info, "TxQueueCount", dpdk_dev_desc->nr_txq); cJSON_AddNumberToObject(j_device_info, "MTU", dpdk_dev_desc->mtu); cJSON_AddBoolToObject(j_device_info, "Promisc", dpdk_dev_desc->promisc); /* status for dev_desc */ cJSON_AddStringToObject(j_device_info, "Role", str_dev_role(dev->role_type)); cJSON_AddStringToObject(j_device_info, "Type", str_dev_type(dev->type)); cJSON_AddStringToObject(j_device_info, "Driver", str_dev_driver(dev->drv_type)); cJSON_AddStringToObject(j_device_info, "Mode", str_dev_mode(dev->dev_mode)); return j_device_info; } cJSON * devmgr_monit_loop(struct sc_main * sc) { struct devmgr_main * devmgr_main = sc->devmgr_main; struct cJSON * j_device_array = cJSON_CreateArray(); unsigned int dev_iterator = 0; struct mr_dev_desc * dev_desc_iter = NULL; while ((dev_desc_iter = mr_dev_desc_iterate(devmgr_main, &dev_iterator)) != NULL) { if (dev_desc_iter->dpdk_dev_desc == NULL) continue; struct cJSON * j_device = cJSON_CreateObject(); cJSON_AddStringToObject(j_device, "symbol", dev_desc_iter->symbol); cJSON_AddItemToObject(j_device, "information", dpdk_dev_monit_info(dev_desc_iter)); cJSON_AddItemToObject(j_device, "link", dpdk_dev_monit_link_status(dev_desc_iter)); cJSON_AddItemToObject(j_device, "stats", dpdk_dev_monit_stats(dev_desc_iter->dpdk_dev_desc)); cJSON_AddItemToObject(j_device, "bond", dpdk_dev_monit_bonds(devmgr_main, dev_desc_iter)); cJSON_AddItemToArray(j_device_array, j_device); } return j_device_array; } /* 在EAL启动前调用,生成ARGC, ARGV等参数 */ int devmgr_early_init(struct sc_main * sc) { assert(sc->devmgr_main == NULL); /* EAL环境还没有初始化,用malloc申请内存 */ sc->devmgr_main = malloc(sizeof(struct devmgr_main)); MR_VERIFY_MALLOC(sc->devmgr_main); struct devmgr_main * devmgr_main = sc->devmgr_main; memset(devmgr_main, 0, sizeof(struct devmgr_main)); TAILQ_INIT(&devmgr_main->dpdk_dev_candidate_list); devmgr_hwfile_load(devmgr_main, sc->local_hwfile); /* the shmdev's port_id is after the dpdk_dev's ports */ devmgr_main->shmdev_port_id_counter = RTE_MAX_ETHPORTS; devmgr_main->sc = sc; /* dpdk pci devices need to be pre-parsed before eal_init */ return dpdk_dev_early_scan(devmgr_main); } void devmgr_eal_args_generate(struct devmgr_main * devmgr_main, char * eal_argv[], unsigned int * eal_argc, unsigned int max_argc) { struct dpdk_dev_candidate * dev_can = NULL; char * local_cfgfile = devmgr_main->sc->local_cfgfile; /* generate the mlx5 parameters */ unsigned int mlx5_en_delay_drop = 0; MESA_load_profile_uint_def(local_cfgfile, "eal", "mlx5_en_delay_drop", &mlx5_en_delay_drop, 0); unsigned int mlx5_en_rxq_pkt_pad = 0; MESA_load_profile_uint_def(local_cfgfile, "eal", "mlx5_en_rxq_pkt_pad", &mlx5_en_rxq_pkt_pad, 1); unsigned int mlx5_txq_inline_max = 0; MESA_load_profile_uint_def(local_cfgfile, "eal", "mlx5_txq_inline_max", &mlx5_txq_inline_max, 128); unsigned int mlx5_txq_inline_mpw = 0; MESA_load_profile_uint_def(local_cfgfile, "eal", "mlx5_txq_inline_mpw", &mlx5_txq_inline_mpw, 128); unsigned int mlx5_tx_pp = 0; MESA_load_profile_uint_def(local_cfgfile, "eal", "mlx5_tx_pp", &mlx5_tx_pp, 0); TAILQ_FOREACH(dev_can, &devmgr_main->dpdk_dev_candidate_list, next) { /* for now, all the candidate devices are PCI devices, * all the PCi devices need to be in the whitelist */ char * str_eal_lead = NULL; asprintf(&str_eal_lead, "%s", "-a"); eal_argv[(*eal_argc)++] = str_eal_lead; char * str_dev_option = NULL; asprintf(&str_dev_option, "%s", dev_can->str_pci_addr); /* for the mlx5/6, disable the duplicated flow feature */ if (strcasecmp(dev_can->driver, "mlx5_core") == 0) { asprintf(&str_dev_option, "%s,%s", str_dev_option, "rxq_cqe_comp_en=4,allow_duplicate_pattern=0"); if (mlx5_en_delay_drop > 0) { asprintf(&str_dev_option, "%s,%s", str_dev_option, "delay_drop=0x1"); } if (mlx5_en_rxq_pkt_pad > 0) { asprintf(&str_dev_option, "%s,rxq_pkt_pad_en=0x1", str_dev_option); } if (mlx5_txq_inline_max > 0) { asprintf(&str_dev_option, "%s,txq_inline_max=%u", str_dev_option, mlx5_txq_inline_max); } if (mlx5_txq_inline_mpw > 0) { asprintf(&str_dev_option, "%s,txq_inline_mpw=%u", str_dev_option, mlx5_txq_inline_mpw); } } eal_argv[(*eal_argc)++] = str_dev_option; } } void devmgr_deinit(struct devmgr_main * devmgr_main) { for (unsigned int i = 0; i < RTE_DIM(devmgr_main->dev_descs); i++) { struct mr_dev_desc * dev_desc = devmgr_main->dev_descs[i]; if (dev_desc == NULL) continue; /* for dpdk based devices */ if (dev_desc->dpdk_dev_desc != NULL) { struct dpdk_dev * dev_dpdk = dev_desc->dpdk_dev_desc; rte_eth_dev_stop(dev_dpdk->port_id); rte_eth_dev_close(dev_dpdk->port_id); } /* for other devices, do nothing */ MR_INFO("device %s (port_id=%d) closed.", dev_desc->symbol, dev_desc->port_id); } } int devmgr_init(struct devmgr_main * devmgr_main) { int ret = 0; /* attach the virtio_user */ ret = dpdk_dev_virtio_user_attach(devmgr_main); if (unlikely(ret < 0)) { MR_ERROR("Failed at attach virtio_user devices."); return RT_ERR; } ret = dpdk_dev_af_packet_attach(devmgr_main); if (unlikely(ret < 0)) { MR_ERROR("Failed at attach af_packet devices."); return RT_ERR; } ret = dpdk_dev_init(devmgr_main); if (unlikely(ret < 0)) { MR_ERROR("Failed at setup dpdk devices."); return RT_ERR; } /* shared-memory devices */ ret = shmdev_init(devmgr_main); if (unlikely(ret < 0)) { MR_ERROR("Failed at setup shared-memory devices."); return RT_ERR; } /* bond devices */ ret = bond_dev_init(devmgr_main); if (unlikely(ret < 0)) { MR_ERROR("Failed at setup bond devices."); return RT_ERR; } /* kernel resp cross link */ kernel_resp_crosslink(devmgr_main); all_dpdk_dev_status_print(devmgr_main); /* 启动物理设备状态更新线程 */ pthread_t _pid_link_update; ret = pthread_create(&_pid_link_update, NULL, dpdk_dev_link_state_update_thread, (void *)devmgr_main); if (ret != 0) { MR_ERROR("PHYDEV link state update thread create failed: %s", strerror(ret)); return RT_ERR; } return RT_SUCCESS; }