#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int tap_ioctl(int fd, unsigned long request, struct ifreq * ifr, int set) { short req_flags = ifr->ifr_flags; switch (request) { case SIOCSIFFLAGS: if (ioctl(fd, SIOCGIFFLAGS, ifr) < 0) goto error; if (set) ifr->ifr_flags |= req_flags; else ifr->ifr_flags &= ~req_flags; break; case SIOCGIFFLAGS: case SIOCGIFHWADDR: case SIOCSIFHWADDR: case SIOCSIFMTU: break; default: return -EINVAL; } if (ioctl(fd, request, ifr) < 0) goto error; return 0; error: MR_ERROR("%s: ioctl(%lu) failed with error: %s", ifr->ifr_name, request, strerror(errno)); return -errno; } struct tap_device { struct mr_instance * ref_mr_instance; struct mr_vdev * ref_vdev; int tap_fd; struct rte_epoll_event epoll_event; rte_atomic64_t stat_write_pkts; rte_atomic64_t stat_write_pktlen; rte_atomic64_t stat_write_drops; rte_atomic64_t stat_read_pkts; rte_atomic64_t stat_read_pktlen; rte_atomic64_t stat_read_drops; }; static int tap_resp_dev_filter(struct vdev * vdev_desc, struct rte_mbuf * mbuf) { struct mrb_metadata * mrb_meta = (struct mrb_metadata *)mrbuf_cz_data(mbuf, MR_NODE_CTRLZONE_ID); struct pkt_parser_result * parser_result = &mrb_meta->pkt_parser_result; const struct rte_ether_hdr * ether_hdr = rte_pktmbuf_mtod(mbuf, const struct rte_ether_hdr *); /* not local's mac addr or broadcast packet, ignore it */ if (rte_is_broadcast_ether_addr(ðer_hdr->dst_addr) == 0 && rte_is_same_ether_addr(ðer_hdr->dst_addr, &vdev_desc->ether_addr) == 0) { return 0; } /* for arp, rarp and lldp, only check the dest's mac address */ if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_ARP) && vdev_desc->representor_config.redirect_local_arp > 0) { return 1; } if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_RARP) && vdev_desc->representor_config.redirect_local_rarp > 0) { return 1; } if (ether_hdr->ether_type == htons(RTE_ETHER_TYPE_LLDP) && vdev_desc->representor_config.redirect_local_lldp > 0) { return 1; } /* allow layers are ETHER->IPv4->TCP, ETHER->IPv4->UDP, the ipv6 is not supported for now. */ static const uint16_t exp_ipv4_tcp[] = { LAYER_TYPE_ID_ETHER, LAYER_TYPE_ID_IPV4, LAYER_TYPE_ID_TCP, }; static const uint16_t exp_ipv4_udp[] = { LAYER_TYPE_ID_ETHER, LAYER_TYPE_ID_IPV4, LAYER_TYPE_ID_UDP, }; static const uint16_t exp_ipv4_others[] = { LAYER_TYPE_ID_ETHER, LAYER_TYPE_ID_IPV4, }; if(parser_result->nr_layers == 3 || parser_result->nr_layers == 2) { goto compare_layers; } else { return 0; } compare_layers: if (complex_layer_type_expect(parser_result, exp_ipv4_tcp, RTE_DIM(exp_ipv4_tcp)) == 0 || complex_layer_type_expect(parser_result, exp_ipv4_udp, RTE_DIM(exp_ipv4_udp)) == 0 || complex_layer_type_expect(parser_result, exp_ipv4_others, RTE_DIM(exp_ipv4_others)) == 0) { return 1; } else { return 0; } assert(false); } static int tap_device_tx(struct tap_device * tap_dev, struct rte_mbuf * mbuf) { const char * pkt_ptr = rte_pktmbuf_mtod(mbuf, const char *); unsigned int pkt_len = rte_pktmbuf_data_len(mbuf); if (pkt_ptr == NULL || pkt_len == 0) { rte_atomic64_add(&tap_dev->stat_write_drops, 1); goto err; } ssize_t len = write(tap_dev->tap_fd, pkt_ptr, pkt_len); if (unlikely(len < 0)) { rte_atomic64_add(&tap_dev->stat_write_drops, 1); goto err; } rte_atomic64_add(&tap_dev->stat_write_pkts, 1); rte_atomic64_add(&tap_dev->stat_write_pktlen, pkt_len); return 0; err: return -1; } static int tap_device_rx(struct tap_device * tap_dev, unsigned int queue_id, marsio_buff_t * buffs[], unsigned int nr_buffs) { char buff[ETH_MAX_MTU]; ssize_t sz_buff = read(tap_dev->tap_fd, buff, sizeof(buff)); if (sz_buff == -1 && (errno == EWOULDBLOCK || errno == EAGAIN)) { return 0; } else if (unlikely(sz_buff < 0)) { return -1; } int ret = marsio_buff_malloc_global(tap_dev->ref_mr_instance, buffs, 1, MARSIO_SOCKET_ID_ANY, MARSIO_LCORE_ID_ANY); if (unlikely(ret < 0)) { return -2; } struct rte_mbuf * mbuf = (struct rte_mbuf *)buffs[0]; assert(mbuf != NULL); char * mbuf_data_ptr = rte_pktmbuf_append(mbuf, sz_buff); if (unlikely(mbuf_data_ptr == NULL)) { marsio_buff_free(tap_dev->ref_mr_instance, buffs, 1, MARSIO_SOCKET_ID_ANY, MARSIO_LCORE_ID_ANY); return -3; } rte_memcpy(mbuf_data_ptr, buff, sz_buff); return 1; } static struct tap_device * tap_device_create(struct mr_instance * mr_instance, const char * name, struct rte_ether_addr * hwaddr) { struct tap_device * tap_dev_inst = rte_zmalloc(NULL, sizeof(struct tap_device), 0); MR_VERIFY_MALLOC(tap_dev_inst); tap_dev_inst->ref_mr_instance = mr_instance; int ioctl_sock = -1; int tap_fd = -1; tap_fd = open("/dev/net/tun", O_RDWR); if (tap_fd < 0) { MR_ERROR("open /dev/net/tun failed: %s", strerror(errno)); goto errout; } ioctl_sock = socket(AF_INET, SOCK_DGRAM, 0); if (ioctl_sock < 0) { MR_ERROR("unable to get a socket for management: %s", strerror(errno)); goto errout; } struct ifreq ifr = {.ifr_flags = IFF_TAP | IFF_NO_PI}; snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name); int ret = ioctl(tap_fd, TUNSETIFF, (void *)&ifr); if (ret < 0) { MR_ERROR("tap %s tap_fd(tap_fd = %d) ioctl failed: %s", name, tap_fd, strerror(errno)); goto errout; } /* set the tap device in up and running status */ struct ifreq link_up_ifr = {.ifr_flags = IFF_UP | IFF_RUNNING}; snprintf(link_up_ifr.ifr_name, IFNAMSIZ, "%s", name); ret = tap_ioctl(ioctl_sock, SIOCSIFFLAGS, &link_up_ifr, 1); if (ret < 0) { MR_ERROR("tap device %s link up failed.", name); goto errout; } /* clear the ifr, and use it as mac addr setup */ ret = tap_ioctl(ioctl_sock, SIOCGIFHWADDR, &ifr, 0); if (ret < 0) { MR_ERROR("tap device %s mac address get failed.", name); goto errout; } ifr.ifr_hwaddr.sa_family = AF_LOCAL; rte_memcpy(ifr.ifr_hwaddr.sa_data, hwaddr, RTE_ETHER_ADDR_LEN); ret = tap_ioctl(ioctl_sock, SIOCSIFHWADDR, &ifr, 1); if (ret < 0) { MR_ERROR("tap device %s mac address set failed.", name); goto errout; } MR_INFO("tap device %s for created.", name); tap_dev_inst->tap_fd = tap_fd; close(ioctl_sock); ioctl_sock = -1; /* first time called, need to create evfd */ if (mr_instance->tap_resp_epfd == 0) { mr_instance->tap_resp_epfd = epoll_create1(EPOLL_CLOEXEC); } /* prepare add the device handle to instance, and join to epoll fd */ struct rte_epoll_event epoll_event = { .epdata.event = EPOLLIN, .epdata.data = (void *)tap_dev_inst, }; tap_dev_inst->epoll_event = epoll_event; ret = rte_epoll_ctl(mr_instance->tap_resp_epfd, EPOLL_CTL_ADD, tap_fd, &tap_dev_inst->epoll_event); if (ret < 0) { MR_ERROR("failed at add tap_fd %d to epoll_fd for tap representor %s.", tap_fd, name); goto errout; } /* add to the tap resp sets */ mr_instance->tap_resp_devices[mr_instance->nr_tap_resp_devices] = tap_dev_inst; mr_instance->nr_tap_resp_devices++; return tap_dev_inst; errout: if (tap_dev_inst != NULL) { rte_free(tap_dev_inst); } if (ioctl_sock >= 0) { close(ioctl_sock); } if (tap_fd >= 0) { close(tap_fd); } return NULL; } static void * tap_representor_poll_thread_entry(void * arg) { #define TAP_RESP_EVENTS_MAX 8 struct mr_instance * mr_instance = (struct mr_instance *)arg; struct rte_epoll_event epoll_events[TAP_RESP_EVENTS_MAX] = {}; int ret = 0; while (1) { int n = rte_epoll_wait(mr_instance->tap_resp_epfd, epoll_events, TAP_RESP_EVENTS_MAX, -1); if (unlikely(n < 0)) { MR_ERROR("rte_epoll_wait returned error %d, tap_resp poll thread terminated.", errno); goto errout; } /* handle the read event, read the packet, then redirect to shmdev queues */ for (int i = 0; i < n; i++) { struct tap_device * tap_dev = (struct tap_device *)(epoll_events[i].epdata.data); struct mr_vdev * vdev = tap_dev->ref_vdev; marsio_buff_t * buff[1]; ret = tap_device_rx(tap_dev, 0, buff, RTE_DIM(buff)); if (ret <= 0) { continue; } ret = mrapp_packet_fast_send_burst(mr_instance, vdev->vdi, 0, (struct rte_mbuf **)buff, RTE_DIM(buff)); if (unlikely(ret < 0)) { marsio_buff_free_v2(mr_instance, buff, RTE_DIM(buff)); } } } errout: return (void *)NULL; } int tap_representor_entry(struct mr_vdev * vdev, unsigned int qid, struct rte_mbuf * buffs[], unsigned int nr_buffs) { static rte_spinlock_t tap_dev_write_lock = {0}; for (unsigned int i = 0; i < nr_buffs; i++) { if (tap_resp_dev_filter(vdev->vdi->vdev, buffs[i]) <= 0) { continue; } /* spinlock at here */ rte_spinlock_lock(&tap_dev_write_lock); tap_device_tx(vdev->tap_representor, buffs[i]); rte_spinlock_unlock(&tap_dev_write_lock); } return 0; } int tap_representor_init(struct mr_instance * mr_instance, struct mr_vdev * vdev) { struct vdev * _vdev = vdev->vdi->vdev; struct tap_device * tap_dev = tap_device_create(mr_instance, vdev->devsym, &_vdev->ether_addr); if (unlikely(tap_dev == NULL)) { MR_ERROR("failed at create tap representor for %s, ignore it.", vdev->devsym); return -1; } tap_dev->ref_mr_instance = mr_instance; tap_dev->ref_vdev = vdev; vdev->tap_representor = tap_dev; /* create the tap representor poll thread at first time. * this thread is use for recv the packet from the tap, and forward to the rings */ if (mr_instance->pid_tap_resp_poll == 0) { int ret = pthread_create(&mr_instance->pid_tap_resp_poll, NULL, tap_representor_poll_thread_entry, (void *)mr_instance); if (unlikely(ret < 0)) { MR_ERROR("failed at creating thread for tap representor poll routine: %s", strerror(errno)); return -2; } } MR_INFO("tap representor for %s created. ", vdev->devsym); return 0; }