/* SmartOffload Flow Management */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SMARTOFFLOAD_DEV_MAX 512 struct smartoffload_dev_instance { struct dpdk_dev * phydev; struct vdev * vdev; unsigned int map_lcore_to_rxq[RTE_MAX_LCORE]; unsigned int map_lcore_to_txq[RTE_MAX_LCORE]; rte_spinlock_t timeout_lock; }; struct smartoffload_stat_per_lcore { /* request handle */ volatile uint64_t offload_request_recv; volatile uint64_t offload_request_accept_success; volatile uint64_t offload_request_accept_err_flow_construct; volatile uint64_t offload_request_accept_err_flow_create; /* flows */ volatile uint64_t flow_create; volatile uint64_t flow_delete; /* hits */ volatile uint64_t flow_hits; volatile uint64_t flow_hits_bytes; } __rte_cache_aligned; #define SMARTOFFLOAD_STAT_ADD(st, counter, value) \ do \ { \ uint32_t lcore_id = rte_lcore_id(); \ st->stat_per_lcore[lcore_id].counter += value; \ } while (0) struct smartoffload_main { struct smartoffload_dev_instance * dev_instances[SMARTOFFLOAD_DEV_MAX]; unsigned int nr_dev_instances; /* stats */ struct smartoffload_stat_per_lcore stat_per_lcore[RTE_MAX_LCORE]; }; int smartoffload_config(struct sc_main * sc) { return 0; } int smartoffload_init(struct sc_main * sc) { sc->smartoffload_main = ZMALLOC(sizeof(struct smartoffload_main)); MR_VERIFY_MALLOC(sc->smartoffload_main); struct smartoffload_main * smartoffload_main = sc->smartoffload_main; struct dpdk_dev * phydev_iter = NULL; /* Checkout all physical devices which smartoffload is enabled. */ while (phydev_iterate(sc->phydev_main, &phydev_iter) >= 0) { if (!phydev_iter->en_smartoffload) { continue; } /* The physical device which smartoffload is enabled must has a virtual device */ struct vdev * vdev_handle = vdev_lookup(sc->vdev_main, phydev_iter->symbol); if (unlikely(vdev_handle == NULL)) { MR_ERROR("The device %s which smartoffload is enabled must has virtual device.", phydev_iter->symbol); goto errout; } struct smartoffload_dev_instance * dev_instance = ZMALLOC(sizeof(struct smartoffload_dev_instance)); MR_VERIFY_MALLOC(dev_instance); dev_instance->phydev = phydev_iter; dev_instance->vdev = vdev_handle; rte_spinlock_init(&dev_instance->timeout_lock); unsigned int qid = 0; for (unsigned int lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { if (!CPU_ISSET(lcore_id, &sc->cpu_set_offload)) continue; dev_instance->map_lcore_to_rxq[lcore_id] = qid; dev_instance->map_lcore_to_txq[lcore_id] = qid; qid++; } assert(dev_instance->phydev != NULL); assert(dev_instance->vdev != NULL); /* Add the dev_handle to main */ smartoffload_main->dev_instances[smartoffload_main->nr_dev_instances++] = dev_instance; } return RT_SUCCESS; errout: return RT_ERR; } union mr_flow_item_union { struct rte_flow_item_eth __flow_item_eth; struct rte_flow_item_ipv4 __flow_item_ipv4; struct rte_flow_item_ipv6 __flow_item_ipv6; struct rte_flow_item_vxlan __flow_item_vxlan; struct rte_flow_item_tcp __flow_item_tcp; struct rte_flow_item_udp __flow_item_udp; }; union mr_flow_action_conf_union { struct rte_flow_action_rss __action_conf_rss; struct rte_flow_action_age __action_conf_age; struct rte_flow_action_queue __action_conf_queue; }; struct mr_flow_item_storage { struct rte_flow_item ptr; union mr_flow_item_union storage_spec; union mr_flow_item_union storage_last; union mr_flow_item_union storage_mask; }; struct mr_flow_action_storage { struct rte_flow_action ptr; union mr_flow_action_conf_union storage_conf; }; static inline struct rte_flow_item * to_rte_flow_item(struct mr_flow_item_storage * st_flow) { return &st_flow->ptr; } static inline struct rte_flow_action * to_rte_flow_action(struct mr_flow_action_storage * st_action) { return &st_action->ptr; } static inline void to_rte_flow_item_array(const struct mr_flow_item_storage * st_flow, unsigned int nr_st_flow, struct rte_flow_item flow_item_array[], unsigned int nr_flow_item_array) { for (unsigned int i = 0; i < RTE_MIN(nr_st_flow, nr_flow_item_array); i++) { flow_item_array[i] = st_flow[i].ptr; } } static inline void to_rte_flow_action_array(const struct mr_flow_action_storage * st_action, unsigned int nr_st_action, struct rte_flow_action flow_action_array[], unsigned int nr_flow_action_array) { for (unsigned int i = 0; i < RTE_MIN(nr_st_action, nr_flow_action_array); i++) { flow_action_array[i] = st_action[i].ptr; } } static void __flow_item_to_str_type_item_eth(const struct rte_flow_item_eth * item, char * outbuf, unsigned int sz_outbuf) { char buf_ether_src[RTE_ETHER_ADDR_FMT_SIZE]; char buf_ether_dst[RTE_ETHER_ADDR_FMT_SIZE]; rte_ether_format_addr(buf_ether_src, sizeof(buf_ether_src), &item->src); rte_ether_format_addr(buf_ether_dst, sizeof(buf_ether_dst), &item->dst); snprintf(outbuf, sz_outbuf - 1, "src:%s, dst:%s, type=%x, has_vlan=%u", buf_ether_src, buf_ether_dst, item->type, item->has_vlan); return; } static void __flow_item_to_str_type_item_ipv4(const struct rte_flow_item_ipv4 * item, char * outbuf, unsigned int sz_outbuf) { char buf_addr_src[INET_ADDRSTRLEN]; char buf_addr_dst[INET_ADDRSTRLEN]; if (inet_ntop(AF_INET, &item->hdr.src_addr, buf_addr_src, sizeof(buf_addr_src)) == NULL) { strncpy(buf_addr_src, "error", sizeof(buf_addr_src) - 1); } if (inet_ntop(AF_INET, &item->hdr.dst_addr, buf_addr_dst, sizeof(buf_addr_dst)) == NULL) { strncpy(buf_addr_dst, "error", sizeof(buf_addr_dst) - 1); } snprintf(outbuf, sz_outbuf - 1, "src:%s, dst:%s", buf_addr_src, buf_addr_dst); return; } static void __flow_item_to_str_type_item_vxlan(const struct rte_flow_item_vxlan * item, char * outbuf, unsigned int sz_outbuf) { snprintf(outbuf, sz_outbuf - 1, "flags:%x, vni:%x,%x,%x", item->flags, item->vni[0], item->vni[1], item->vni[2]); } static void __flow_item_to_str_type_item_udp(const struct rte_flow_item_udp * item, char * outbuf, unsigned int sz_outbuf) { snprintf(outbuf, sz_outbuf - 1, "sport:%u, dport:%u", ntohs(item->hdr.src_port), ntohs(item->hdr.dst_port)); } static void __flow_item_to_str_type_item_tcp(const struct rte_flow_item_tcp * item, char * outbuf, unsigned int sz_outbuf) { snprintf(outbuf, sz_outbuf - 1, "sport:%u, dport:%u", ntohs(item->hdr.src_port), ntohs(item->hdr.dst_port)); } static void __flow_item_to_str_helper(const struct rte_flow_item * item, char * outbuf, unsigned int sz_outbuf) { char str_type[MR_STRING_MAX] = "unknown"; char str_spec[MR_STRING_MAX] = ""; char str_mask[MR_STRING_MAX] = ""; switch (item->type) { #define _GEN_CODE(_type, _short_type, _str_type) \ case _type: { \ const struct rte_flow_item_##_short_type * __spec = (const struct rte_flow_item_##_short_type *)item->spec; \ const struct rte_flow_item_##_short_type * __mask = (const struct rte_flow_item_##_short_type *)item->mask; \ \ snprintf(str_type, sizeof(str_type) - 1, "%s", _str_type); \ __flow_item_to_str_type_item_##_short_type(__spec, str_spec, sizeof(str_spec)); \ __flow_item_to_str_type_item_##_short_type(__mask, str_mask, sizeof(str_mask)); \ break; \ } _GEN_CODE(RTE_FLOW_ITEM_TYPE_ETH, eth, "ETH") _GEN_CODE(RTE_FLOW_ITEM_TYPE_IPV4, ipv4, "IPV4") _GEN_CODE(RTE_FLOW_ITEM_TYPE_VXLAN, vxlan, "VXLAN") _GEN_CODE(RTE_FLOW_ITEM_TYPE_UDP, udp, "UDP") _GEN_CODE(RTE_FLOW_ITEM_TYPE_TCP, tcp, "TCP") #undef _GEN_CODE default: break; } snprintf(outbuf, sz_outbuf - 1, "TYPE: %s, SPEC: %s, MASK: %s", str_type, str_spec, str_mask); } static inline void mr_flow_item_init(struct mr_flow_item_storage * st_flow) { memset(st_flow, 0, sizeof(*st_flow)); } static inline void mr_flow_action_init(struct mr_flow_action_storage * st_action) { memset(st_action, 0, sizeof(*st_action)); } #define FLOW_ITEM_VAL_SET(_st_flow, _val_type, _val) \ do \ { \ _st_flow->ptr._val_type = &_st_flow->storage_##_val_type; \ rte_memcpy((void *)_st_flow->ptr._val_type, _val, sizeof(*(_val))); \ } while (0) #define FLOW_ITEM_VAL_DO_MASK(_val, _mask) \ do \ { \ for (unsigned int i = 0; i < sizeof(*(_val)); i++) \ ((char *)_val)[i] = ((char *)_val)[i] & ((char *)_mask)[i]; \ } while (0) #define FLOW_ITEM_TYPE_SET(_st_flow, _val) \ do \ { \ _st_flow->ptr.type = _val; \ } while (0) #define ACTION_ITEM_TYPE_SET(_st_action, _val) \ do \ { \ _st_action->ptr.type = _val; \ } while (0) #define ACTION_ITEM_VAL_SET(_st_action, _val) \ do \ { \ _st_action->ptr.conf = &_st_action->storage_conf; \ rte_memcpy((void *)&_st_action->storage_conf, _val, sizeof(*(_val))); \ } while (0) struct smartoffload_flow { /* offload request */ char * pkt_header; unsigned int pkt_header_len; unsigned int age; /* apply to */ struct dpdk_dev * phydev; /* macthers and actions */ #define __MAX_PATTERN_LAYERS 32 #define __MAX_ACTION_LAYERS 16 struct mr_flow_item_storage patterns[__MAX_PATTERN_LAYERS]; unsigned int nr_patterns; struct mr_flow_action_storage actions[__MAX_ACTION_LAYERS]; unsigned int nr_actions; /* handlers */ struct rte_flow * flow; }; struct mr_flow_item_storage * __push_pattern_helper(struct smartoffload_flow * flow) { if (flow->nr_patterns >= __MAX_PATTERN_LAYERS) return NULL; struct mr_flow_item_storage * st_item = &flow->patterns[flow->nr_patterns++]; memset(st_item, 0, sizeof(struct mr_flow_item_storage)); return st_item; } struct mr_flow_action_storage * __push_action_helper(struct smartoffload_flow * flow) { if (flow->nr_actions >= __MAX_ACTION_LAYERS) return NULL; struct mr_flow_action_storage * st_action = &flow->actions[flow->nr_actions++]; memset(st_action, 0, sizeof(struct mr_flow_action_storage)); return st_action; } /* The smartoffload request is serialized in TLV-like format, which can be transfer in the shared memory or the ethernet packets. The request is serialized in the libmarsio.so and deserialized in the mrzcpd application. The structure of the serialized is like follows: --------------------------------------------------------- TYPE: 2-bytes, Length: 2-bytes, Value: [variable Length] MAGIC NUMBER: 0x4d5b 1. TYPE(Raw Packet Header), Length: [VAR], Value: the headers which contains TCP/UDP header from packets to offloaded. 2. TYPE(AGE), Length: [4-bytes], Value: unsigned int, live time in seconds of the offload request. */ struct tlv_header_define { uint16_t be_type; uint16_t be_length; char value[0]; }; enum { SMARTOFFLOAD_COMM_TYPE_PKT_HEADER = 0, SMARTOFFLOAD_COMM_TYPE_AGE = 1, SMARTOFFLOAD_COMM_TYPE_COUNTER_PKTS = 2, SMARTOFFLOAD_COMM_TYPE_COUNTER_BYTES = 3 }; static int flow_action_create_swap_vxlan_encap_header(struct smartoffload_flow * flow) { if (flow->nr_patterns < 2) { goto errout; } /* extract target loopback ip and mac address from pattern */ struct mr_flow_item_storage * st_outer_ether_addr = &flow->patterns[0]; struct rte_flow_item * rt_outer_ether_layer = to_rte_flow_item(st_outer_ether_addr); struct mr_flow_item_storage * st_outer_ipv4_addr = &flow->patterns[1]; struct rte_flow_item * rt_outer_ipv4_layer = to_rte_flow_item(st_outer_ipv4_addr); if (rt_outer_ether_layer == NULL || rt_outer_ether_layer->type != RTE_FLOW_ITEM_TYPE_ETH) { goto errout; } if (rt_outer_ipv4_layer == NULL || rt_outer_ipv4_layer->type != RTE_FLOW_ITEM_TYPE_IPV4) { goto errout; } /* first action, swap the outer mac addresses */ struct rte_flow_item_eth * ether_spec = (struct rte_flow_item_eth *)rt_outer_ether_layer->spec; struct rte_flow_action_set_mac action_conf_set_src_mac; struct rte_flow_action_set_mac action_conf_set_dst_mac; rte_memcpy(action_conf_set_src_mac.mac_addr, ether_spec->dst.addr_bytes, RTE_ETHER_ADDR_LEN); rte_memcpy(action_conf_set_dst_mac.mac_addr, ether_spec->src.addr_bytes, RTE_ETHER_ADDR_LEN); struct rte_flow_item_ipv4 * ipv4_spec = (struct rte_flow_item_ipv4 *)rt_outer_ipv4_layer->spec; struct rte_flow_action_set_ipv4 action_conf_set_src_ipv4 = { .ipv4_addr = ipv4_spec->hdr.dst_addr, }; struct rte_flow_action_set_ipv4 action_conf_set_dst_ipv4 = { .ipv4_addr = ipv4_spec->hdr.src_addr, }; /* swap the ipv4 source addr and dest addr */ struct mr_flow_action_storage * st_action; st_action = __push_action_helper(flow); ACTION_ITEM_TYPE_SET(st_action, RTE_FLOW_ACTION_TYPE_SET_MAC_SRC); ACTION_ITEM_VAL_SET(st_action, &action_conf_set_src_mac); st_action = __push_action_helper(flow); ACTION_ITEM_TYPE_SET(st_action, RTE_FLOW_ACTION_TYPE_SET_MAC_DST); ACTION_ITEM_VAL_SET(st_action, &action_conf_set_dst_mac); st_action = __push_action_helper(flow); ACTION_ITEM_TYPE_SET(st_action, RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC); ACTION_ITEM_VAL_SET(st_action, &action_conf_set_src_ipv4); st_action = __push_action_helper(flow); ACTION_ITEM_TYPE_SET(st_action, RTE_FLOW_ACTION_TYPE_SET_IPV4_DST); ACTION_ITEM_VAL_SET(st_action, &action_conf_set_dst_ipv4); /* make ttl = ttl -1, for debug */ st_action = __push_action_helper(flow); ACTION_ITEM_TYPE_SET(st_action, RTE_FLOW_ACTION_TYPE_DEC_TTL); return 0; errout: return RT_ERR; } static int flow_action_create_hairpin(struct smartoffload_flow * flow, unsigned int qid_hairpin, unsigned int timeout) { struct rte_flow_action_count action_conf_dedicated_counter = { #if RTE_VERSION_NUM(21, 11, 0, 0) <= RTE_VERSION .id = 0, #else .shared = 0, #endif }; struct rte_flow_action_queue action_conf_hairpin_queue = { .index = qid_hairpin, }; struct rte_flow_action_age action_conf_age = { .context = flow->flow, .timeout = timeout, }; struct mr_flow_action_storage * st_action; st_action = __push_action_helper(flow); ACTION_ITEM_TYPE_SET(st_action, RTE_FLOW_ACTION_TYPE_COUNT); ACTION_ITEM_VAL_SET(st_action, &action_conf_dedicated_counter); st_action = __push_action_helper(flow); ACTION_ITEM_TYPE_SET(st_action, RTE_FLOW_ACTION_TYPE_QUEUE); ACTION_ITEM_VAL_SET(st_action, &action_conf_hairpin_queue); st_action = __push_action_helper(flow); ACTION_ITEM_TYPE_SET(st_action, RTE_FLOW_ACTION_TYPE_AGE); ACTION_ITEM_VAL_SET(st_action, &action_conf_age); return 0; } static int flow_action_create_drop(struct smartoffload_flow * flow, unsigned int timeout) { struct rte_flow_action_count action_conf_dedicated_counter = { #if RTE_VERSION_NUM(21, 11, 0, 0) <= RTE_VERSION .id = 0, #else .shared = 0, #endif }; struct rte_flow_action_age action_conf_age = { .context = flow, .timeout = timeout, }; struct mr_flow_action_storage * st_action; st_action = __push_action_helper(flow); ACTION_ITEM_TYPE_SET(st_action, RTE_FLOW_ACTION_TYPE_COUNT); ACTION_ITEM_VAL_SET(st_action, &action_conf_dedicated_counter); st_action = __push_action_helper(flow); ACTION_ITEM_TYPE_SET(st_action, RTE_FLOW_ACTION_TYPE_AGE); ACTION_ITEM_VAL_SET(st_action, &action_conf_age); st_action = __push_action_helper(flow); ACTION_ITEM_TYPE_SET(st_action, RTE_FLOW_ACTION_TYPE_DROP); st_action = __push_action_helper(flow); ACTION_ITEM_TYPE_SET(st_action, RTE_FLOW_ACTION_TYPE_END); return 0; } static const struct rte_flow_item_eth __flow_item_eth_mask = { .type = RTE_BE16(0xffff), }; static int flow_pattern_create_by_pkt_header(struct smartoffload_flow * flow, const char * pkt, unsigned int pktlen) { /* Packet Parse */ struct pkt_parser pkt_parser; struct pkt_parser * pkt_parser_ptr = &pkt_parser; pkt_parser_init(pkt_parser_ptr, LAYER_TYPE_ALL, MR_PKT_PARSE_RESULT_MAX); complex_parser_ether(pkt_parser_ptr, pkt); /* Setup the offload pattern from the parser's result */ for (unsigned int i = 0; i < pkt_parser_ptr->nr_results; i++) { struct pkt_parser_result * result = &pkt_parser_ptr->results[i]; struct pkt_parser_result * next_layer_result = i + 1 < pkt_parser_ptr->nr_results ? &pkt_parser_ptr->results[i + 1] : NULL; struct mr_flow_item_storage * st_item = __push_pattern_helper(flow); if (result->this_layer_type == LAYER_TYPE_ETHER) { /* Ignore the ethernet layer, because the pkts in same flow may come from several routers for the ECMP policy or LAG ports. */ struct rte_ether_hdr * ether_hdr = (struct rte_ether_hdr *)(result->data); struct rte_flow_item_eth eth_spec = {}; #if RTE_VERSION_NUM(21, 11, 0, 0) <= RTE_VERSION rte_ether_addr_copy(ðer_hdr->src_addr, ð_spec.hdr.src_addr); rte_ether_addr_copy(ðer_hdr->dst_addr, ð_spec.hdr.dst_addr); #else rte_ether_addr_copy(ðer_hdr->s_addr, ð_spec.src); rte_ether_addr_copy(ðer_hdr->d_addr, ð_spec.dst); #endif FLOW_ITEM_TYPE_SET(st_item, RTE_FLOW_ITEM_TYPE_ETH); FLOW_ITEM_VAL_DO_MASK(ð_spec, &rte_flow_item_eth_mask); FLOW_ITEM_VAL_SET(st_item, spec, ð_spec); FLOW_ITEM_VAL_SET(st_item, mask, &rte_flow_item_eth_mask); continue; } else if (result->this_layer_type == LAYER_TYPE_IPV4) { struct rte_ipv4_hdr * ipv4_hdr = (struct rte_ipv4_hdr *)(result->data); FLOW_ITEM_TYPE_SET(st_item, RTE_FLOW_ITEM_TYPE_IPV4); FLOW_ITEM_VAL_DO_MASK(ipv4_hdr, &rte_flow_item_ipv4_mask); FLOW_ITEM_VAL_SET(st_item, spec, ipv4_hdr); FLOW_ITEM_VAL_SET(st_item, mask, &rte_flow_item_ipv4_mask); continue; } else if (result->this_layer_type == LAYER_TYPE_IPV6) { struct rte_ipv6_hdr * ipv6_hdr = (struct rte_ipv6_hdr *)(result->data); FLOW_ITEM_TYPE_SET(st_item, RTE_FLOW_ITEM_TYPE_IPV6); FLOW_ITEM_VAL_DO_MASK(ipv6_hdr, &rte_flow_item_ipv6_mask); FLOW_ITEM_VAL_SET(st_item, spec, ipv6_hdr); FLOW_ITEM_VAL_SET(st_item, mask, &rte_flow_item_ipv6_mask); continue; } else if (result->this_layer_type == LAYER_TYPE_TCP) { struct rte_tcp_hdr * tcp_hdr = (struct rte_tcp_hdr *)(result->data); FLOW_ITEM_TYPE_SET(st_item, RTE_FLOW_ITEM_TYPE_TCP); FLOW_ITEM_VAL_DO_MASK(tcp_hdr, &rte_flow_item_tcp_mask); FLOW_ITEM_VAL_SET(st_item, spec, tcp_hdr); FLOW_ITEM_VAL_SET(st_item, mask, &rte_flow_item_tcp_mask); continue; } else if (result->this_layer_type == LAYER_TYPE_UDP) { struct rte_udp_hdr * udp_hdr = (struct rte_udp_hdr *)(result->data); FLOW_ITEM_TYPE_SET(st_item, RTE_FLOW_ITEM_TYPE_UDP); FLOW_ITEM_VAL_SET(st_item, spec, udp_hdr); /* some tunnel headers such as GTP, VXLAN are payload of the UDP packets we need to know next layer is tunnel or not */ const static struct rte_flow_item_udp flow_item_udp_mask_ignore_source = { .hdr = { .src_port = RTE_BE16(0x0000), .dst_port = RTE_BE16(0xffff), }, }; if (next_layer_result != NULL && next_layer_result->this_layer_type == LAYER_TYPE_G_VXLAN) { FLOW_ITEM_VAL_DO_MASK(udp_hdr, &flow_item_udp_mask_ignore_source); FLOW_ITEM_VAL_SET(st_item, mask, &flow_item_udp_mask_ignore_source); } else { FLOW_ITEM_VAL_DO_MASK(udp_hdr, &rte_flow_item_udp_mask); FLOW_ITEM_VAL_SET(st_item, mask, &rte_flow_item_udp_mask); } continue; } else if (result->this_layer_type == LAYER_TYPE_G_VXLAN) { struct g_vxlan_hdr * g_vxlan_hdr = (struct g_vxlan_hdr *)(result->data); struct rte_flow_item_vxlan * vxlan_pattern = ZMALLOC(sizeof(struct rte_flow_item_vxlan)); static_assert(sizeof(g_vxlan_hdr) == sizeof(struct rte_flow_item_vxlan), "g_vxlan_hdr must have same size with flow"); rte_memcpy(vxlan_pattern, g_vxlan_hdr, sizeof(struct g_vxlan_hdr)); FLOW_ITEM_TYPE_SET(st_item, RTE_FLOW_ITEM_TYPE_VXLAN); FLOW_ITEM_VAL_DO_MASK(vxlan_pattern, &rte_flow_item_vxlan_mask); FLOW_ITEM_VAL_SET(st_item, spec, g_vxlan_hdr); FLOW_ITEM_VAL_SET(st_item, mask, &rte_flow_item_vxlan_mask); continue; } } /* add END item */ struct mr_flow_item_storage * st_item_end = __push_pattern_helper(flow); FLOW_ITEM_TYPE_SET(st_item_end, RTE_FLOW_ITEM_TYPE_END); return RT_SUCCESS; not_supported: /* TODO: release all heap's memory */ return RT_ERR; } void smartoffload_flow_destroy(struct smartoffload_flow * flow) { if (flow->pkt_header != NULL) { FREE(flow->pkt_header); } #if 0 if(flow->patterns != NULL) { FREE(flow->patterns); } if(flow->actions != NULL) { FREE(flow->actions); } FREE(flow); #endif } int smartoffload_flow_init_by_deserialized(struct smartoffload_flow * offload_flow, struct dpdk_dev * phydev, const char * offload_request, unsigned int offload_request_len) { #if 0 struct smartoffload_flow * offload_flow = ZMALLOC(sizeof(struct smartoffload_flow)); MR_VERIFY_MALLOC(offload_flow); offload_flow->patterns = ZMALLOC(sizeof(struct mr_flow_item_storage) * __MAX_PATTERN_LAYERS); MR_VERIFY_MALLOC(offload_flow->patterns); offload_flow->actions = ZMALLOC(sizeof(struct mr_flow_action_storage) * __MAX_ACTION_LAYERS); MR_VERIFY_MALLOC(offload_flow->actions); #endif unsigned int cur_offset = 0; struct tlv_header_define * tlv_header; /* TODO: check the magic number */ /* deserialized the TLV request */ while (cur_offset < offload_request_len) { tlv_header = (struct tlv_header_define *)RTE_PTR_ADD(offload_request, cur_offset); /* be_type and be_length are network-order, convert to host-order */ uint16_t le_type = ntohs(tlv_header->be_type); uint16_t le_length = ntohs(tlv_header->be_length); /* TODO: boundary check */ switch (le_type) { case SMARTOFFLOAD_COMM_TYPE_PKT_HEADER: offload_flow->pkt_header = ZMALLOC(le_length); offload_flow->pkt_header_len = le_length; MR_VERIFY_MALLOC(offload_flow->pkt_header); memcpy(offload_flow->pkt_header, tlv_header->value, offload_flow->pkt_header_len); break; case SMARTOFFLOAD_COMM_TYPE_AGE: offload_flow->age = ntohl(*(uint32_t *)tlv_header->value); break; default: /* Unknown Type */ goto errout; } cur_offset += sizeof(struct tlv_header_define) + le_length; } /* convert the packet to rte_flow's pattern */ if (flow_pattern_create_by_pkt_header(offload_flow, offload_flow->pkt_header, offload_flow->pkt_header_len) < 0) { goto errout; } if (flow_action_create_hairpin(offload_flow, phydev->hairpin_q, offload_flow->age) < 0) { goto errout; } if (flow_action_create_swap_vxlan_encap_header(offload_flow) < 0) { goto errout; } return 0; errout: #if 0 if (offload_flow != NULL) { smartoffload_flow_destroy(offload_flow); offload_flow = NULL; } #endif return -1; } const static struct rte_flow_attr __flow_offload_attr = { .group = 1, .ingress = 1, .priority = 0, }; void smartoffload_rxtx_loop_per_dev(struct sc_main * sc, struct smartoffload_dev_instance * dev_instance) { struct vdev * vdev = dev_instance->vdev; struct dpdk_dev * phydev = dev_instance->phydev; struct rte_flow_error flow_error = {}; struct smartoffload_main * main = sc->smartoffload_main; uint32_t lcore_id = rte_lcore_id(); unsigned int local_rxq_id = dev_instance->map_lcore_to_rxq[lcore_id]; /* collect control messages from the vdev */ struct rte_mbuf * mbufs[MR_BURST_MAX]; unsigned int rx_nr_mbufs = vdev_collect(vdev, local_rxq_id, mbufs, RTE_DIM(mbufs), VDEV_COLLECT_CTRL); if (unlikely(rx_nr_mbufs == 0)) { return; } SMARTOFFLOAD_STAT_ADD(main, offload_request_recv, rx_nr_mbufs); for (unsigned int i = 0; i < rx_nr_mbufs; i++) { const char * ctrl_pkt_ptr = rte_pktmbuf_mtod(mbufs[i], const char *); unsigned int ctrl_pkt_len = rte_pktmbuf_data_len(mbufs[i]); struct smartoffload_flow __flow_in_stack = {}; struct smartoffload_flow * smart_offload_flow = &__flow_in_stack; int ret = smartoffload_flow_init_by_deserialized(smart_offload_flow, phydev, ctrl_pkt_ptr, ctrl_pkt_len); if (unlikely(ret < 0)) { SMARTOFFLOAD_STAT_ADD(main, offload_request_accept_err_flow_construct, 1); goto _errout; } /* convert mr_flow_storage to rte_flow */ struct rte_flow_item rt_patterns[__MAX_PATTERN_LAYERS] = {}; struct rte_flow_action rt_actions[__MAX_ACTION_LAYERS] = {}; to_rte_flow_item_array(smart_offload_flow->patterns, smart_offload_flow->nr_patterns, rt_patterns, RTE_DIM(rt_patterns)); to_rte_flow_action_array(smart_offload_flow->actions, smart_offload_flow->nr_actions, rt_actions, RTE_DIM(rt_actions)); ret = rte_flow_validate(phydev->port_id, &__flow_offload_attr, rt_patterns, rt_actions, &flow_error); if (unlikely(ret < 0)) { MR_ERROR("Failed at flow validate: ret = %d\n", ret); SMARTOFFLOAD_STAT_ADD(main, offload_request_accept_err_flow_create, 1); goto _errout; } smart_offload_flow->flow = rte_flow_create(phydev->port_id, &__flow_offload_attr, rt_patterns, rt_actions, &flow_error); /* dump the flow */ for (unsigned int i = 0; i < __MAX_PATTERN_LAYERS; i++) { const struct rte_flow_item * item = &rt_patterns[i]; if (item->type == RTE_FLOW_ITEM_TYPE_END) break; #if 0 char str_flow_buf[MR_STRING_MAX]; __flow_item_to_str_helper(item, str_flow_buf, sizeof(str_flow_buf)); fprintf(stderr, "flow: %p, layer: %d: %s\n", smart_offload_flow->flow, i, str_flow_buf); #endif } if (unlikely(smart_offload_flow->flow == NULL)) { SMARTOFFLOAD_STAT_ADD(main, offload_request_accept_err_flow_create, 1); // fprintf(stderr, "%s\n", flow_error.message); goto _errout; } FREE(smart_offload_flow->pkt_header); smart_offload_flow->pkt_header = NULL; #if 0 FREE(smart_offload_flow->patterns); smart_offload_flow->patterns = NULL; FREE(smart_offload_flow->actions); smart_offload_flow->actions = NULL; #endif SMARTOFFLOAD_STAT_ADD(main, offload_request_accept_success, 1); SMARTOFFLOAD_STAT_ADD(main, flow_create, 1); continue; _errout: if (smart_offload_flow != NULL) { smartoffload_flow_destroy(smart_offload_flow); smart_offload_flow = NULL; } } /* release the mbufs */ for (unsigned int i = 0; i < rx_nr_mbufs; i++) { struct rte_mbuf * pkt_to_free = mbufs[i]; hook_rte_pktmbuf_free(pkt_to_free); } return; } void smartoffload_timeout_flows_per_dev(struct sc_main * sc, struct smartoffload_dev_instance * dev_instance) { struct vdev * vdev = dev_instance->vdev; struct dpdk_dev * phydev = dev_instance->phydev; struct rte_flow_error flow_error = {}; int ret = rte_spinlock_trylock(&dev_instance->timeout_lock); if (unlikely(ret <= 0)) { return; } #define __MAX_FLOW_CTXS 65536 struct rte_flow * flow_ctxs[__MAX_FLOW_CTXS]; int nr_aged_flows = rte_flow_get_aged_flows(phydev->port_id, (void **)flow_ctxs, __MAX_FLOW_CTXS, &flow_error); if (nr_aged_flows <= 0) { goto out; } static const struct rte_flow_action_count query_counter = { #if RTE_VERSION_NUM(21, 11, 0, 0) <= RTE_VERSION .id = 0, #else .shared = 0, #endif }; static const struct rte_flow_action actions[] = { { .type = RTE_FLOW_ACTION_TYPE_COUNT, .conf = &query_counter, }, { .type = RTE_FLOW_ACTION_TYPE_END, .conf = NULL, }, }; /* Delete all aged flows */ for (int i = 0; i < nr_aged_flows; i++) { struct rte_flow * flow_ctx = flow_ctxs[i]; int ret = 0; /* query the counters */ struct rte_flow_query_count counter = {0}; ret = rte_flow_query(phydev->port_id, flow_ctx, actions, &counter, &flow_error); if (unlikely(ret < 0)) { MR_ERROR("Cannot query counter result of port %s, flow %p: %s", phydev->symbol, flow_ctx, flow_error.message); } SMARTOFFLOAD_STAT_ADD(sc->smartoffload_main, flow_hits, counter.hits); SMARTOFFLOAD_STAT_ADD(sc->smartoffload_main, flow_hits_bytes, counter.bytes); ret = rte_flow_destroy(phydev->port_id, flow_ctx, &flow_error); if (unlikely(ret < 0)) { MR_ERROR("Cannot delete flow from port %s: %s", phydev->symbol, flow_error.message); } } SMARTOFFLOAD_STAT_ADD(sc->smartoffload_main, flow_delete, nr_aged_flows); goto out; out: rte_spinlock_unlock(&dev_instance->timeout_lock); return; } #define TIMEOUT_CHECK_US 500000 void smartoffload_rxtx_loop(struct sc_main * sc, unsigned int lcore_id) { struct smartoffload_main * smartoffload_main = sc->smartoffload_main; const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * TIMEOUT_CHECK_US; static uint64_t prev_tsc[RTE_MAX_LCORE] = {}; uint64_t cur_tsc = rte_rdtsc(); for (unsigned int i = 0; i < smartoffload_main->nr_dev_instances; i++) { smartoffload_rxtx_loop_per_dev(sc, smartoffload_main->dev_instances[i]); } uint64_t diff_tsc = cur_tsc - prev_tsc[lcore_id]; if (unlikely(diff_tsc > drain_tsc)) { for (unsigned int i = 0; i < smartoffload_main->nr_dev_instances; i++) { smartoffload_timeout_flows_per_dev(sc, smartoffload_main->dev_instances[i]); } prev_tsc[lcore_id] = cur_tsc; } return; } int32_t smartoffload_service_entry(void * args) { struct sc_main * sc = (struct sc_main *)args; unsigned int lcore_id = rte_lcore_id(); smartoffload_rxtx_loop(sc, lcore_id); return 0; } cJSON * smartoffload_monit_loop(struct sc_main * sc) { struct smartoffload_main * sm_main = sc->smartoffload_main; cJSON * json_root = cJSON_CreateObject(); #define _JSON_STAT_GENERATE(_counter, _str_counter) \ do \ { \ cJSON * json_array = cJSON_CreateArray(); \ for (unsigned int i = 0; i < RTE_MAX_LCORE; i++) \ { \ if (!CPU_ISSET(i, &sc->cpu_set_offload)) \ continue; \ \ uint64_t value = sm_main->stat_per_lcore[i]._counter; \ cJSON_AddItemToArray(json_array, cJSON_CreateNumber(value)); \ } \ cJSON_AddItemToObject(json_root, _str_counter, json_array); \ } while (0) /* request handle */ _JSON_STAT_GENERATE(offload_request_recv, "req_recv"); _JSON_STAT_GENERATE(offload_request_accept_success, "req_success"); _JSON_STAT_GENERATE(offload_request_accept_err_flow_construct, "req_err_flow_construct"); _JSON_STAT_GENERATE(offload_request_accept_err_flow_create, "req_err_flow_create"); _JSON_STAT_GENERATE(flow_create, "flow_create"); _JSON_STAT_GENERATE(flow_delete, "flow_delete"); _JSON_STAT_GENERATE(flow_hits, "flow_hits_pkts"); _JSON_STAT_GENERATE(flow_hits_bytes, "flow_hits_bytes"); return json_root; }