From 86a43b4d325ddc850fa9dc4711670880f35b11e8 Mon Sep 17 00:00:00 2001 From: lijia Date: Wed, 24 Oct 2018 09:36:45 +0800 Subject: create new project. --- src/access/flowood_access.c | 1656 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1656 insertions(+) create mode 100644 src/access/flowood_access.c (limited to 'src/access/flowood_access.c') diff --git a/src/access/flowood_access.c b/src/access/flowood_access.c new file mode 100644 index 0000000..6af4866 --- /dev/null +++ b/src/access/flowood_access.c @@ -0,0 +1,1656 @@ +#include "flowood.h" +#include "flowood_fun.h" +#include "flwd_net.h" +#include "MESA_htable.h" +#include "MESA_list_queue.h" +#include "MESA_handle_logger.h" +#include "MESA_list_count.h" +#include "ltsm.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + IP复用系统接入网关, 负责接收终端的流量, 根据策略选择特定IP后, 做NAT转换, + 发送给转发网关, + 以独立进程方式运行. +*/ + +static int flwd_access_fetch_usable_ipport_by_single_ip( + flwd_active_ip_t *flwd_active_ip, flwd_active_ip_port_args_t *usable_active_ipport_args) +{ + int ret; + int try_times; + MESA_list_count_t *usable_sport_queue_head, *sport_list_node; + unsigned short sport; + long buf_len; + flwd_tuple5_t seek_nat_key; + unsigned int compat_io_hash; + char ip_str[64]; + int usable_sport_num; + + if(IPPROTO_TCP == usable_active_ipport_args->protocol){ + usable_sport_queue_head = &flwd_active_ip->usable_tcp_sport_list_head; + }else{ + usable_sport_queue_head = &flwd_active_ip->usable_udp_sport_list_head; + } + + usable_sport_num = MESA_list_count_get_count(usable_sport_queue_head); + if(usable_sport_num <= 0){ + /* 当前IP可用端口已全部分配完 */ + flwd_log(20, "ip_pool: %s, all usable sport is in used!\n", + flwd_ipt_ntop_r(&flwd_active_ip->active_ip_net_order, ip_str, 64)); + return ITERATE_CB_RET_CONTINUE_FLAG; + } + + flwd_tuple5_dup_to_stack(usable_active_ipport_args->tid, &seek_nat_key, usable_active_ipport_args->nat_key); + + flwd_tuple5_adjust_dir(&seek_nat_key); + + if(FLWD_IP_ADDR_TYPE_V4 == flwd_active_ip->active_ip_net_order.addr_type){ + seek_nat_key.ippair_v4.sip_net_order = flwd_active_ip->active_ip_net_order.addr_ipv4; + }else{ + memcpy(&seek_nat_key.ippair_v6->sip_net_order, + &flwd_active_ip->active_ip_net_order.addr_ipv6, + sizeof(struct in6_addr)); + } + + /* + TODO, + 此处选择了源IP, 还要选择合适的源端口, 使回复包凑巧能分流到当前发送的线程, + 可以使用ring_queue, 需要不停的遍历尝试可用端口, 直到hash值符合分流策略. + */ + sport_list_node = usable_sport_queue_head->nextele; + + for(try_times = 0; + (try_times < usable_sport_num) && (sport_list_node != usable_sport_queue_head); + try_times++, sport_list_node = sport_list_node->nextele){ + /* 遍历所有端口, 查看哪个生成的四元组HASH凑巧符合当前线程id */ + memcpy(&seek_nat_key.sport_net_order, &sport_list_node->quiddity, sizeof(short)); + compat_io_hash = compat_marsio_tuple4_hash(&seek_nat_key); + if((compat_io_hash % flwd_cfg_val.tot_thread_count) == usable_active_ipport_args->tid){ + MESA_list_count_del(usable_sport_queue_head, sport_list_node); /* 从可用端口链表中移除 */ + free(sport_list_node); /* 端口已经获取到, 清除链表管理结构 */ + goto bingo; + } + } + + /* 虽然有可用端口, 但是当前IP没找到适合当前线程, 以构造合适的HASH值的最佳端口, 继续尝试下一个IP */ + flwd_log(20, "ip_pool: %s,has %d usable sport, but unfortunately no match hash value sport for current thread!\n", + usable_sport_num, + flwd_ipt_ntop_r(&flwd_active_ip->active_ip_net_order, ip_str, 64), usable_sport_num); + return ITERATE_CB_RET_CONTINUE_FLAG ; + +bingo: + usable_active_ipport_args->act_sip_net_order = flwd_active_ip->active_ip_net_order; + usable_active_ipport_args->act_sport_net_order = seek_nat_key.sport_net_order; + + memcpy(&usable_active_ipport_args->gdev_args, &flwd_active_ip->gdev_args, sizeof(flwd_gdev_associated_args_t)); + + return ITERATE_CB_RET_BREAK_FLAG | ITERATE_CB_RET_REVERSE_FLAG; +} + +/* + NOTE: + 为了同时适应static_pool和dynamic_pool, 共用一个函数, + 不同的是static_pool为一个flwd_active_ip_t结构的链表, 需要在此函数内循环遍历. +*/ +static int flwd_access_fetch_usable_ipport_by_ip_list_cb( + const uchar * key, uint size, void * data, void *user) +{ + int ret; + flwd_active_ip_t *flwd_active_ip_list_head = (flwd_active_ip_t *)data; + flwd_active_ip_t *tmp_node; + flwd_active_ip_port_args_t *usable_active_ipport_args = (flwd_active_ip_port_args_t *)user; + + tmp_node = flwd_active_ip_list_head; + do{ + ret = flwd_access_fetch_usable_ipport_by_single_ip(tmp_node, usable_active_ipport_args); + if(ITERATE_CB_RET_BREAK_FLAG == ret){ + break; + } + + tmp_node = (flwd_active_ip_t *)tmp_node->active_ip_list_node.nextele->quiddity; + }while(flwd_active_ip_list_head != tmp_node); + + return ret|ITERATE_CB_RET_REVERSE_FLAG /* 修改item顺序, 每个连接更换地址池里不同的IP */; +} + +/* + 输入: 真实客户端的四元组; + 输出: 可用的源活跃IP; + + 返回值: + 1 : 命中策略配置, 并且找到符号当前策略的可用源ip; + 0 : 无策略; + -1 : 命中了策略, 但没有可用IP; + + service_defined格式: + "IR_STRATEGY:123" +*/ +static int flwd_access_snat_search_policy(int tid, + flwd_tuple5_t *tuple5, flwd_active_ip_port_args_t *usable_active_ipport_net_order) +{ + unsigned int usable_active_ip = 0; + int ret; + int found_usable_policy_ip = -1; + +#if FLWD_NO_ACTIVE_IP_DISCOVER + return 0; /* 临时测试, 无策略 */ +#else + unsigned int policy_id; + MESA_list_t *list_node; + flwd_active_ip_t *ip_static_pool_list_head, *list_node_item; + flwd_ip_region_type_t ip_pool_retion; + unsigned char static_ip_group_key[64]; + int static_ip_group_key_len = 64; + + policy_id = flwd_access_maat_scan_rule(tid, tuple5); + if(0 == policy_id){ + return 0; + } + + flwd_log(10, "tuple4 %s hit policy id:%u\n", flwd_tuple5_ntop(tid, tuple5), policy_id); + + /* 复用的IP为当前目标IP的反向 */ + if(FLWD_IP_REGION_INLAND == usable_active_ipport_net_order->dip_region_type){ + ip_pool_retion = FLWD_IP_REGION_OUTLAND; + }else{ + ip_pool_retion = FLWD_IP_REGION_INLAND; + } + + flwd_policy_group_id_key_gen(policy_id, static_ip_group_key, &static_ip_group_key_len); + + ip_static_pool_list_head = flwd_ip_pool_search(FLWD_ACT_IP_STATIC, + flwd_thread_val[tid].flwd_ip_pool_static_htable[tuple5->addr_type][ip_pool_retion], + static_ip_group_key, + static_ip_group_key_len); + if(NULL == ip_static_pool_list_head){ + flwd_log(30, "tuple4 %s hit policy id:%u, but no valid ip in static ip_pool!\n", + flwd_tuple5_ntop(tid, tuple5), policy_id); + return 0; + } + + list_node = &ip_static_pool_list_head->active_ip_list_node; + do{ + list_node_item = (flwd_active_ip_t *)list_node->quiddity; + /* 复用htable_cb函数, 此处的key, size可不填 */ + ret = flwd_access_fetch_usable_ipport_by_single_ip(list_node_item, usable_active_ipport_net_order); + if(ITERATE_CB_RET_BREAK_FLAG == ret){ + found_usable_policy_ip = 1; + break; + } + list_node = list_node->nextele; + }while(list_node != &ip_static_pool_list_head->active_ip_list_node); + +#endif + + return found_usable_policy_ip; +} + + +/* + 从地址池中, 随机选择一个可用活跃IP. + + TODO: + 多个不同客户端, 使用了不同策略, 把同一个源IP分在不同的静态地址池里, + 即都用1.2.3.4做为出口IP, 端口范围是按access_gateway数量区分的, + 对于同一个access_gateway, 复用了一样的端口, 冲突怎么办? + 怎么解决冲突问题??!! + + 或者让界面保证, 一个IP地址只能属于一个静态地址池. +*/ +static int flwd_access_snat_fetch_usable_ip_from_pool( + MESA_htable_handle table, pthread_rwlock_t *flwd_ip_pool_rwlock, int tid, + flwd_tuple5_t *nat_key, flwd_active_ip_port_args_t *usable_active_ipport_args) +{ + + pthread_rwlock_wrlock(flwd_ip_pool_rwlock); + + /* 依靠htable的按时间优先遍历特性, 挑选最新的IP, 根据dip的地域, 选择相反的源活跃ip */ + MESA_htable_iterate_bytime(table, + ITERATE_TYPE_NEWEST_FIRST, + flwd_access_fetch_usable_ipport_by_ip_list_cb, + usable_active_ipport_args); + + if((0 == usable_active_ipport_args->act_sip_net_order.addr_ipv4) + || (0 == memcmp(&usable_active_ipport_args->act_sip_net_order.addr_ipv6, + &flwd_global_val.zero_ipv6_addr, sizeof(struct in6_addr)))){ + pthread_rwlock_unlock(flwd_ip_pool_rwlock); + return -1; + } + + pthread_rwlock_unlock(flwd_ip_pool_rwlock); + return 0; +} + +#if FLWD_SUPPORT_DNAT +static void * flwd_search_access_dnat_lb_policy(int tid, + flwd_tuple4v4_t *nat_key, flwd_actual_ip_port_t *actual_ipport_net_order) +{ + /* 根据dip, dport找到真实服务器的dip, dport, 替换现有数据包, 然后存储到nat-htable */ + return 0; +} +#endif + + + +/* TODO: + 防止单向流, 非对称路由问题, 导致S2C方向的流量和C2S方向不在同一台转发网关, + + 要将连接表扩散至全网的转发网关, + + 三元组, dip+dport+sip, 四元组量太大了, + + sport采用一种特殊指纹算法, 扫中三元组后再判定sport. +*/ +static int flwd_link_table_broadcast(int tid, flwd_nat_info_t *nat_info) +{ + /* TODO: + 防止单向流, 非对称路由问题, 导致S2C方向的流量和C2S方向不在同一台转发网关, + + 要将连接表扩散至全网的转发网关. + */ + return 0; +} + + +/* 将指纹信息存储于源端口的特定字段, 用于区分IR的包和真实客户端的包 */ +static void flwd_pkt_signature_hide_to_sport( + unsigned short pkt_signature, flwd_nat_info_t *nat_info) +{ + unsigned short sport_host_order; + + pkt_signature &= (FLWD_UDP_SPORT_HASH_MASK >> 8); + + if(0 == nat_info->outer_nat_tuple5.dir_reverse){ + sport_host_order = ntohs(nat_info->outer_nat_tuple5.sport_net_order); + sport_host_order &= (~FLWD_UDP_SPORT_HASH_MASK); /* 先清空hash字段 */ + sport_host_order |= (pkt_signature << 8); /* hash字段隐藏于sport中 */ + nat_info->outer_nat_tuple5.sport_net_order = htons(sport_host_order); + }else{ + sport_host_order = ntohs(nat_info->outer_nat_tuple5.dport_net_order); + sport_host_order &= (~FLWD_UDP_SPORT_HASH_MASK); /* 先清空hash字段 */ + sport_host_order |= (pkt_signature << 8); /* hash字段隐藏于sport中 */ + nat_info->outer_nat_tuple5.dport_net_order = htons(sport_host_order); + } + + return; +} + + +/* + 选择源端口需要保证发出去的线程id, 在应答包回来之后, 还能被驱动分流到同一个线程, + 可能需要尝试多次, 找到一个合适的sport, 凑巧符合驱动的分流算法. + + 因为dip, dport是不能变的, sip也是随机选定后不能变, 只能在源端口上做一些trick, + 而且多个不同接入网关之间, 单个网关的不同线程之间已经预先分配了端口范围, + 对于一个活跃IP来说, 端口上能做的这个trick的范围更小, 区别准确度很低! + + 采取一种新策略: + 1,TCP协议, 使用SYN-ISN转换技术, 通过32bit的ISN号做hash, 做为指纹; + 2,DNS协议, 使用DNS-TRANSID转换技术, 通过16bit的TRANSID做为指纹, 冲突率比TCP_ISN会大一些; + 3,其他UDP协议, 选择的四元组使用maat_redis全网扩散, 此类协议可能存在延时问题导致首包丢失! +*/ +static int flwd_acc_user_generate_pkt_signature( + int tid, flwd_nat_info_t *nat_info, flwd_raw_pkt_t *raw_pkt) +{ + const flwd_ipv4_hdr_t *flwd_iphdr = (flwd_ipv4_hdr_t *)raw_pkt->inner_ip_layer_hdr; + const flwd_udp_hdr_t *flwd_udphdr; + const flwd_tcp_hdr_t *flwd_tcphdr; + int canot_convert_pro; + unsigned int hash_val; + unsigned short pkt_signature; + + if(FLWD_NAT_TYPE_SNAT != nat_info->nat_type){ + return 0; + } + + if(IPPROTO_TCP == flwd_iphdr->ip_p){ + flwd_tcphdr = (flwd_tcp_hdr_t *)((char *)flwd_iphdr + flwd_iphdr->ip_hl * 4); + assert(TH_SYN == flwd_tcphdr->th_flags); + canot_convert_pro = 0; + /* 此处要使用nat之后的四元组, 指纹信息主要用于转发网关识别来自internet的数据包 */ + hash_val = flwd_tuple5_hash(&nat_info->outer_nat_tuple5, 1); + /* + TODO: + 对于TCP协议, 使用端口HASH这个方式冲突率较高, 改用修改ISN的方式, + */ +#if 0 + nat_info->signature_param = (long long)hash_val - (long long)ntohl(flwd_tcphdr->th_seq); +#else + pkt_signature = hash_val % (FLWD_UDP_SPORT_HASH_MASK >> 8); +#endif + + }else if(IPPROTO_UDP == flwd_iphdr->ip_p){ + /* + TODO, + 对于UDP协议, 仅用端口HASH区分, 冲突率也较高, 再增加redis全网扩散三元组功能, + IP复用和真实客户端访问同一个服务器, 而且端口还凑巧冲突的几率极小. + */ + hash_val = flwd_tuple5_hash(&nat_info->outer_nat_tuple5, 1); + pkt_signature = hash_val % (FLWD_UDP_SPORT_HASH_MASK >> 8); + canot_convert_pro = 0; + }else{ + canot_convert_pro = 1; + } + + if(1 == canot_convert_pro){ + /* TODO: 普通UDP协议靠redis实时扩散三元组, 识别应答包 */ + //flwd_link_table_broadcast(tid, nat_info); + }else{ + flwd_pkt_signature_hide_to_sport(pkt_signature, nat_info); + } + + return 0; +} + +static flwd_terminal_proto_t flwd_access_proto_identify(flwd_raw_pkt_t *raw_pkt) +{ + flwd_terminal_proto_t flwd_pro = FLWD_TERMINAL_IP_LAYER; + + /* + TODO: + 识别L2TP, PPTP接入方式的数据包. + + 或者靠其他方式终结VPN, 转为纯IP包, 等同于IP三层接入. + */ + + return flwd_pro; +} + + + +/* 接入网关从客户端方向收包, 网络底层原始数据包入口函数 */ +static int flwd_acc_user_pkt_input(flwd_device_handle_t *device_handle, + int tid, flwd_raw_pkt_t *raw_pkt) +{ + int ret; + + raw_pkt->terminal_proto = flwd_access_proto_identify(raw_pkt); + switch(raw_pkt->terminal_proto){ + case FLWD_TERMINAL_IP_LAYER: + flwd_access_kernal_pkt_input(device_handle, tid, raw_pkt); + break; + + case FLWD_TERMINAL_IP_L2TP: + /* TODO */ + assert(0); + break; + + case FLWD_TERMINAL_IP_PPTP: + /* TODO */ + assert(0); + break; + + default: + assert(0); + } + + return 0; +} + +/* + NAT地址转换, + 根据nat_info中, 内外网的四元组, 替换数据包中的IP和端口, + 并重新计算校验和. +*/ +static int flwd_nat_set_new_addr_v4(flwd_topology_t topo_mode, + flwd_nat_type_t nat_type, const flwd_nat_info_t *nat_info, flwd_ipv4_hdr_t *flwd_ip4hdr) +{ + flwd_tcp_hdr_t *flwd_tcp_hdr; + flwd_udp_hdr_t *flwd_udp_hdr; + const flwd_tuple5_t *after_nat_addr; + struct ltsm_result ltsm_res; + unsigned int *tobe_modify_ip; + unsigned short *tobe_modify_port; + unsigned char tcp_flags = 0; + + if(TOPO_ACC_LINK_USER == topo_mode){ + /* C2I方向, 用outer的地址, 替换当前包的源ip和源端口 */ + after_nat_addr = &nat_info->outer_nat_tuple5; + tobe_modify_ip = &flwd_ip4hdr->ip_src.s_addr; + if(IPPROTO_TCP == flwd_ip4hdr->ip_p){ + flwd_tcp_hdr = (flwd_tcp_hdr_t *)((char *)flwd_ip4hdr + flwd_ip4hdr->ip_hl * 4); + tcp_flags = flwd_tcp_hdr->th_flags; + tobe_modify_port = &flwd_tcp_hdr->th_sport; + }else{ + flwd_udp_hdr = (flwd_udp_hdr_t *)((char *)flwd_ip4hdr + flwd_ip4hdr->ip_hl * 4); + tobe_modify_port = &flwd_udp_hdr->uh_sport; + } + }else{ + /* I2C方向, 用inner的地址, 替换当前包的目标ip和目标端口 */ + after_nat_addr = &nat_info->inner_nat_tuple5; + tobe_modify_ip = &flwd_ip4hdr->ip_dst.s_addr; + if(IPPROTO_TCP == flwd_ip4hdr->ip_p){ + flwd_tcp_hdr = (flwd_tcp_hdr_t *)((char *)flwd_ip4hdr + flwd_ip4hdr->ip_hl * 4); + tcp_flags = flwd_tcp_hdr->th_flags; + tobe_modify_port = &flwd_tcp_hdr->th_dport; + }else{ + flwd_udp_hdr = (flwd_udp_hdr_t *)((char *)flwd_ip4hdr + flwd_ip4hdr->ip_hl * 4); + tobe_modify_port = &flwd_udp_hdr->uh_dport; + } + } + + /* 为了hash只查找一次, key的地址存储都是根据sip>dip的结果而定, 要根据存储的key的地址方向, 选择真正的原始地址 */ + if(0 == after_nat_addr->dir_reverse){ + *tobe_modify_ip = after_nat_addr->ippair_v4.sip_net_order; + *tobe_modify_port = after_nat_addr->sport_net_order; + }else{ + *tobe_modify_ip = after_nat_addr->ippair_v4.dip_net_order; + *tobe_modify_port = after_nat_addr->dport_net_order; + } + + if(IPPROTO_TCP == flwd_ip4hdr->ip_p){ + flwd_sendpacket_do_checksum((char *)flwd_ip4hdr, + IPPROTO_TCP, + ntohs(flwd_ip4hdr->ip_len) - flwd_ip4hdr->ip_hl * 4); /* 从传输层头部开始到包末尾的长度 */ + }else{ + flwd_sendpacket_do_checksum((char *)flwd_ip4hdr, + IPPROTO_UDP, + ntohs(flwd_ip4hdr->ip_len) - flwd_ip4hdr->ip_hl * 4); /* 从传输层头部开始到包末尾的长度 */ + } + + flwd_sendpacket_do_checksum((char *)flwd_ip4hdr, + IPPROTO_IP, + sizeof(flwd_ipv4_hdr_t)); + +#if FLWD_USE_LTSM_FOR_QUICK_CLOSE + unsigned char dir; + if(tcp_flags != 0){ + if(FLWD_NAT_TYPE_SNAT == nat_info->nat_type){ + if(TOPO_ACC_LINK_USER == topo_mode){ + dir = LTSM_DIR_C2S; /* SNAT模式, 从user卡收到的肯定是client端的包 */ + }else{ + dir = LTSM_DIR_S2C; + } + }else{ + if(TOPO_ACC_LINK_USER == topo_mode){ + dir = LTSM_DIR_S2C; + }else{ + dir = LTSM_DIR_C2S; + } + } + + ltsm_res = ltsm_get_current_state(nat_info->ltsm_stat_handle, tcp_flags, dir); + } +#endif + + return (int)ltsm_res.fstate; +} + + +static int flwd_nat_set_new_addr_v6(flwd_topology_t topo_mode, + flwd_nat_type_t nat_type, const flwd_nat_info_t *nat_info,flwd_ipv6_hdr_t *flwd_ip6hdr) +{ + /* TODO, IPv6 NAT */ + return -1; +} + + +/* + 根据之前已经存储的转发表, 做nat转换 + 目前采用的策略是直接修改原始包的IP地址和端口, 提高性能, 避免每次都malloc/memcpy/free操作. +*/ +static int flwd_do_nat(flwd_device_handle_t *device_handle, + flwd_nat_info_t *nat_info, flwd_raw_pkt_t *raw_pkt) +{ + int ret; + + if(FLWD_IP_ADDR_TYPE_V4 == nat_info->outer_nat_tuple5.addr_type){ + ret = flwd_nat_set_new_addr_v4(device_handle->io_para.topo_mode, + nat_info->nat_type, nat_info, + (flwd_ipv4_hdr_t *)raw_pkt->inner_ip_layer_hdr); + }else{ + ret = flwd_nat_set_new_addr_v6(device_handle->io_para.topo_mode, + nat_info->nat_type, nat_info, + (flwd_ipv6_hdr_t *)raw_pkt->inner_ip_layer_hdr); + } + + return ret; +} + +static void *flwd_nat_table_search(void) +{ + /* + 转发网关命中后, 会将SNAT或DNAT类型存储到vxlan的某个字段, + + 对于SNAT, C2S方向的存储NAT后的四元组, 待S2C方向的回来后, 再次查找htable相同的key, 恢复之前真实客户端IP和PORT即可; + + 对于DNAT, 第一个SYN包扫描IP复用策略表, 找到一个可用的RIP和RPORT, 然后也存储到htable, + 后续的虚拟服务器的包, 直接和SNAT可复用一个htable, 查找一次即可。 + */ + return 0; +} + + +static void * flwd_acc_to_fwd_pre_output(flwd_device_handle_t *rcv_device_handle, + flwd_device_handle_t *send_device_handle, int tid, flwd_nat_info_t *nat_info, + flwd_raw_pkt_t *raw_pkt, unsigned int out_opt) +{ + flwd_vxlan_hdr_t *outer_vxlan_hdr; + unsigned char *local_acc_gateway_mac = NULL ; + int ret; + flwd_ip_t ip_union; + char ip_str[64]; + flwd_eth_hdr_t *inner_eth_hdr; + + char *unsend_raw_data_ptr = rcv_device_handle->low_level_mbuff_mtod(raw_pkt->low_level_mbuff); + int unsend_raw_data_len = rcv_device_handle->low_level_mbuff_get_pkt_len(raw_pkt->low_level_mbuff); + void *send_mbuff = send_device_handle->low_level_mbuff_malloc(send_device_handle, tid, unsend_raw_data_len+FLWD_VXLAN_OUTER_PACKET_LEN); + char *send_buf_data_ptr; + + /* 将接收句柄的缓冲区copy到待发送句柄的缓冲区, 因两边捕包模式可能不一样, + 比如access使用协议栈模式, 而forward使用marsio模式, + mbuff不能直接复用, 必须有一次malloc/memcpy/free的过程. + */ + send_device_handle->low_level_mbuff_data_append(send_mbuff, unsend_raw_data_ptr, unsend_raw_data_len); + + /* 指针向前移动, 预留vxlan包空间 */ + send_buf_data_ptr = (char *)send_device_handle->low_level_mbuff_data_forward(send_mbuff, FLWD_VXLAN_OUTER_PACKET_LEN); + + /* 构造底层vxlan信息, 不需考虑分流情况, 因为都靠内层四元组hash分流, vxlan外层地址仅有路由寻址意义 */ + +#if FLWD_NO_ACTIVE_ARP_QUERY + local_acc_gateway_mac = send_device_handle->io_para.local_mac_addr; + unsigned char remote_fwd_gateway_mac[6] = {0x0c, 0xc4,0x7a,0x14,0x6f,0x5a}; /* 10.0.6.203 */ + flwd_sendpacket_build_ethernet(ETH_P_IP, local_acc_gateway_mac, remote_fwd_gateway_mac, send_buf_data_ptr); +#else + if(send_device_handle->io_para.cap_mode != CAP_MODEL_SOCKET){ /* 非socket模式才需要构造vxlan外层的ethernet层 */ + local_acc_gateway_mac = send_device_handle->io_para.local_mac_addr; + + ip_union.addr_type = FLWD_IP_ADDR_TYPE_V4; + ip_union.addr_ipv4 = nat_info->flwd_route_info.next_gateway_ip_net; + + if(memcmp(nat_info->flwd_route_info.next_gateway_mac, flwd_global_val.zero_mac_addr, 6) == 0){ + /* 此处把网关MAC保存到nat_info, 不用每次都查arp表 */ + ret = flwd_arp_table_query(nat_info->tid, &ip_union, send_device_handle, nat_info->flwd_route_info.next_gateway_mac); + if(ret < 0){ + send_device_handle->low_level_mbuff_free(send_device_handle, tid, send_mbuff); + flwd_log(30, "arp query %s, but not found!\n", flwd_ipt_ntop_r(&ip_union, ip_str, 64)); + return NULL; + } + } + + /* 根据活跃IP的捕获时的gdevIP, 查询GDEV的MAC, 构造ethernet header */ + unsigned char *remote_fwd_gateway_mac = nat_info->flwd_route_info.next_gateway_mac; + flwd_sendpacket_build_ethernet(ETH_P_IP, local_acc_gateway_mac, remote_fwd_gateway_mac, send_buf_data_ptr); + } + +#endif + + /* NOTE, 即使是socket模式, 不需要构造底层vxlan原始包, 但DIP, PORT可能都不一样, 把这些值还是封装在数据包里, 让dl_io_send从包里取 */ +#if FLWD_NO_GDEV_ENV + flwd_sendpacket_build_ipv4(unsend_raw_data_len + sizeof(flwd_udp_hdr_t) + sizeof(flwd_vxlan_hdr_t), /* 内层原始包 + UDP包头 + vxlan头 */ + 0, + 0x1234, + 0, + 64, + IPPROTO_UDP, + send_device_handle->io_para.device_ip_net_order, + nat_info->flwd_route_info.next_gateway_ip_net, + NULL, + 0, + (char *)send_buf_data_ptr + sizeof(flwd_eth_hdr_t)); +#else + flwd_sendpacket_build_ipv4(unsend_raw_data_len + sizeof(flwd_udp_hdr_t) + sizeof(flwd_vxlan_hdr_t), /* 内层原始包 + UDP包头 + vxlan头 */ + 0, + 0x1234, + 0, + 64, + IPPROTO_UDP, + 0x0100007F, /* 本机socket */ + 0x0100007F, /* 本机socket */ + NULL, + 0, + (char *)send_buf_data_ptr + sizeof(flwd_eth_hdr_t)); +#endif + + flwd_sendpacket_build_udp(unsend_raw_data_len + sizeof(flwd_vxlan_hdr_t), + htons(50000 + tid), /* TODO, 此处的sport不像内层的sport, 内层sport是全局识别某个接入网关的, vxlan的sport可以随意 */ + htons(60000 + tid), + NULL, + 0, + send_buf_data_ptr + sizeof(flwd_eth_hdr_t) + sizeof(flwd_ipv4_hdr_t)); + + if(CAP_MODEL_SOCKET != send_device_handle->io_para.cap_mode){ /* socket模式底层由协议栈封装, 校验和由协议栈实现 */ + flwd_sendpacket_do_checksum((char *)send_buf_data_ptr + sizeof(flwd_eth_hdr_t), + IPPROTO_IP, + sizeof(flwd_ipv4_hdr_t)); + } + + outer_vxlan_hdr = (flwd_vxlan_hdr_t *)(send_buf_data_ptr + sizeof(flwd_eth_hdr_t) + sizeof(flwd_ipv4_hdr_t) + sizeof(flwd_udp_hdr_t)); + memset(outer_vxlan_hdr, 0, sizeof(flwd_vxlan_hdr_t)); + outer_vxlan_hdr->link_id = nat_info->flwd_route_info.gdev_args.link_id; + outer_vxlan_hdr->link_layer_type = 0; /* 0:ethernet; */ + outer_vxlan_hdr->dir = nat_info->flwd_route_info.gdev_args.this_ip_as_sip_route_dir; + if(FLWD_OUTOPT_FIRST_PKT == (out_opt & FLWD_OUTOPT_FIRST_PKT)){ + outer_vxlan_hdr->first_pkt_per_stream = 1; + } + + inner_eth_hdr = (flwd_eth_hdr_t *)(send_buf_data_ptr + sizeof(flwd_eth_hdr_t) + sizeof(flwd_ipv4_hdr_t) + sizeof(flwd_udp_hdr_t) + sizeof(flwd_vxlan_hdr_t)); + memcpy(inner_eth_hdr->h_source, nat_info->flwd_route_info.gdev_args.inner_raw_smac, 6); + memcpy(inner_eth_hdr->h_dest , nat_info->flwd_route_info.gdev_args.inner_raw_dmac, 6); + + if(FLWD_IP_ADDR_TYPE_V4 == nat_info->inner_nat_tuple5.addr_type){ + inner_eth_hdr->h_proto = htons(ETH_P_IP); + }else{ + inner_eth_hdr->h_proto = htons(ETH_P_IPV6); + } + + return send_mbuff; +} + +static void * flwd_acc_to_user_pre_output(flwd_device_handle_t *rcv_device_handle, + flwd_device_handle_t *send_device_handle, int tid, flwd_nat_info_t *nat_info, + flwd_raw_pkt_t *raw_pkt, unsigned int out_opt) +{ + flwd_vxlan_hdr_t *outer_vxlan_dr; + + const char *unsend_raw_data_ptr = raw_pkt->inner_pkt_data; /* 此指针在flwd_acc_fwd_pkt_input()函数, 已经指向了内层ethernet头部 */ + int unsend_raw_data_len = raw_pkt->inner_pkt_len; + void *send_mbuff = send_device_handle->low_level_mbuff_malloc(send_device_handle, tid, unsend_raw_data_len + FLWD_VXLAN_OUTER_PACKET_LEN); + char *send_buf_data_ptr; + + /* 将接收句柄的缓冲区copy到待发送句柄的缓冲区, 因两边捕包模式可能不一样, 不能直接复用, 必须有一次malloc/memcpy/free的过程 */ + send_buf_data_ptr = send_device_handle->low_level_mbuff_data_append(send_mbuff, unsend_raw_data_ptr, unsend_raw_data_len); + +#if FLWD_NO_ACTIVE_ARP_QUERY + /* 没有ARP动态查询的功能时, 手工更新测试终端的mac */ + unsigned char *local_acc_gateway_mac = send_device_handle->io_para.local_mac_addr; + unsigned char *remote_user_mac = nat_info->flwd_route_info.inner_terminal_mac; + + flwd_sendpacket_build_ethernet(ETH_P_IP, local_acc_gateway_mac, remote_user_mac, send_buf_data_ptr); +#else + /* 没有ARP动态查询的功能时, 手工更新测试终端的mac */ + unsigned char *local_acc_gateway_mac = send_device_handle->io_para.local_mac_addr; + unsigned char *remote_user_mac = nat_info->flwd_route_info.inner_terminal_mac; + + flwd_sendpacket_build_ethernet(ETH_P_IP, local_acc_gateway_mac, remote_user_mac, send_buf_data_ptr); +#endif + + return send_mbuff; +} + + +/* + TODO: + 靠系统或其他第三方软件实现L2TP, PPTP隧道接入, + 靠Iptables 将ppp接入的下一条网关指向(重定向)另一个tap设备, + 这样, tap设备的读取端(接入网关), 就能看到隧道内部的ip包, + 收到ip包后跟acc-user一样的处理流程. + + 问题: + acc-user只有一个io_handle, 目前是pcap, marsio, socket, 或tap, + 但操作pcap和tap的句柄明显不一样, + + 解决方式: + 1, 一个Flowood_IO句柄(四个方向, acc-user, acc-fwd, fwd-acc, fwd-gdev), 可对应多个虚拟IO句柄, + 即acc-user方向的IO句柄可以有三个虚拟IO句柄, 即三个不同的接入方式: pcap, l2tp_tap, pptp_tap, + 最终做到从哪进从哪出; + + 2, 如果是复用一个网卡, 物理句柄可以是多个, 但实际打开一块物理网卡; + 如acc-user, acc-fwd仅用一块网卡连接到交换机, + Flowood句柄是两个, 但实际网卡就一个, 此时应该靠不同的IP段来区别, + 即一块网卡多个IP地址, 靠数据包的dip区别哪个是acc-user的包, 哪个是acc-fwd的包. + +*/ + +static int flwd_access_output(flwd_device_handle_t *rcv_device_handle, int tid, + flwd_nat_info_t *nat_info, flwd_raw_pkt_t *raw_pkt, unsigned int out_opt) +{ + int ret; + flwd_device_handle_t *send_device_handle; + void *send_mbuff ; + const char *send_actual_ptr; + char debug_outer_tuple4[128]; + + /* + TODO, + + ACC向USER转发数据时, 如果是普通IP协议栈, 可以通过low_level_io_send()直接将包发出去, + 但有可能还是PPTP隧道, 这时的数据包仅是内层包的一部分, 外层PPP封装需要由VPN网关完成, + 需要注入隧道的入口, 而不是通过底层IO接口直接发送出网卡. + + 应继续划分底层物理接口, 如果pcap, marsio, socket等系统方式, + 逻辑接口, 如一个物理层两个IP, 当做两个逻辑网卡, + 或者是L2TP, PPTP等隧道, 从原始物理层收到PPP的包后, 最终会解析出内层IP包, + + 物理层和逻辑层是一对多的关系, 即使仅有一个接口, 也必须隐射出一个逻辑层, + flowood必须和逻辑层进行通信, 不可直接对物理层. + */ + + if(TOPO_ACC_LINK_USER == rcv_device_handle->io_para.topo_mode){ + send_device_handle = &flwd_global_val.global_io_handle[TOPO_ACC_LINK_FWD]; + send_mbuff = flwd_acc_to_fwd_pre_output(rcv_device_handle, send_device_handle, tid, nat_info, raw_pkt, out_opt); + if(NULL == send_mbuff){ + return -1; + } + ret = send_device_handle->low_level_send(send_device_handle, tid, send_mbuff); + if(ret < 0){ + send_actual_ptr = (char *)rcv_device_handle->low_level_mbuff_mtod(send_mbuff); + flwd_log(30, "ACC: send pkt to forward gateway error! inner_tuple4 is:%s, outer_tuple4 is:%s\n", + flwd_debug_print_tuple4_detail(send_actual_ptr+FLWD_VXLAN_OUTER_PACKET_LEN+sizeof(flwd_eth_hdr_t), tid), + flwd_debug_print_tuple4_detail_r(send_actual_ptr + sizeof(flwd_eth_hdr_t), debug_outer_tuple4, 128)); + }else{ + if(flwd_cfg_val.flwd_log_level <= 10){ + send_actual_ptr = (char *)rcv_device_handle->low_level_mbuff_mtod(send_mbuff); + flwd_log(10, "ACC: send pkt to forward gateway succ, inner_tuple4 is:%s, outer_tuple4 is:%s\n", + flwd_debug_print_tuple4_detail(send_actual_ptr+FLWD_VXLAN_OUTER_PACKET_LEN+sizeof(flwd_eth_hdr_t), tid), + flwd_debug_print_tuple4_detail_r(send_actual_ptr + sizeof(flwd_eth_hdr_t), debug_outer_tuple4, 128)); + } + } + + }else{ + send_device_handle = &flwd_global_val.global_io_handle[TOPO_ACC_LINK_USER]; + send_mbuff = flwd_acc_to_user_pre_output(rcv_device_handle, send_device_handle, tid, nat_info, raw_pkt, out_opt); + ret = send_device_handle->low_level_send(send_device_handle, tid, send_mbuff); + if(ret < 0){ + send_actual_ptr = (char *)rcv_device_handle->low_level_mbuff_mtod(send_mbuff); + flwd_log(30, "ACC: send pkt to user client error! tuple4 is:%s\n", + flwd_debug_print_tuple4_detail(send_actual_ptr+sizeof(flwd_eth_hdr_t), tid)); + }else{ + if(flwd_cfg_val.flwd_log_level <= 10){ + send_actual_ptr = (char *)rcv_device_handle->low_level_mbuff_mtod(send_mbuff); + flwd_log(10, "ACC: send pkt to user client succ! tuple4 is:%s\n", + flwd_debug_print_tuple4_detail(send_actual_ptr+sizeof(flwd_eth_hdr_t), tid)); + + } + } + } + + send_device_handle->low_level_mbuff_free_after_send(send_device_handle, tid, send_mbuff); + + return ret; +} + + +/* + 连接结束后, 释放占用活跃IP的源端口. +*/ +static void flwd_sport_recycle(flwd_nat_info_t *nat_info) +{ + /* 根据nat类型, snat回收outer->tuple5的sip, 即真实外网源IP; + dnat没有额外分配资源, 只是简单的将virtual_dip:virtual_dport改成了real_dip:real_dport. + */ + MESA_list_count_t *usable_sport_queue_head; + MESA_list_count_t *recycle_node; + unsigned char *ip_key; + unsigned int ip_key_size; + MESA_htable_handle act_ip_htable; + int tid = nat_info->tid; + const flwd_tuple5_t *recycle_tuple5; + unsigned short recycle_sport; + char ip_str[64]; + + if(FLWD_NAT_TYPE_DNAT == nat_info->nat_type){ + return; /* nothing to do */ + } + + if(FLWD_NAT_TYPE_SNAT == nat_info->nat_type){ + recycle_tuple5 = &nat_info->outer_nat_tuple5; + } + + if(flwd_likely(FLWD_IP_ADDR_TYPE_V4 == nat_info->inner_nat_tuple5.addr_type)){ + if(FLWD_ACT_IP_DYNAMIC == nat_info->act_ip_origin){ + act_ip_htable = flwd_thread_val[tid].flwd_ip_pool_dynamic_htable[FLWD_IP_ADDR_TYPE_V4][nat_info->act_ip_region]; + }else{ + act_ip_htable = flwd_thread_val[tid].flwd_ip_pool_static_htable[FLWD_IP_ADDR_TYPE_V4][nat_info->act_ip_region]; + } + + if(recycle_tuple5->dir_reverse){ + ip_key = (unsigned char *)&recycle_tuple5->ippair_v4.dip_net_order; + recycle_sport = recycle_tuple5->dport_net_order; + }else{ + ip_key = (unsigned char *)&recycle_tuple5->ippair_v4.sip_net_order; + recycle_sport = recycle_tuple5->sport_net_order; + } + ip_key_size = sizeof(int); + }else{ /* IPv6 */ + if(FLWD_ACT_IP_DYNAMIC == nat_info->act_ip_origin){ + act_ip_htable = flwd_thread_val[tid].flwd_ip_pool_dynamic_htable[FLWD_IP_ADDR_TYPE_V6][nat_info->act_ip_region]; + }else{ + act_ip_htable = flwd_thread_val[tid].flwd_ip_pool_static_htable[FLWD_IP_ADDR_TYPE_V6][nat_info->act_ip_region]; + } + + if(recycle_tuple5->dir_reverse){ + ip_key = (unsigned char *)&recycle_tuple5->ippair_v6->dip_net_order; + recycle_sport = recycle_tuple5->dport_net_order; + }else{ + ip_key = (unsigned char *)&recycle_tuple5->ippair_v6->sip_net_order; + recycle_sport = recycle_tuple5->sport_net_order; + } + ip_key_size = sizeof(struct in6_addr); + } + + if(sizeof(int) == ip_key_size){ + inet_ntop(AF_INET, ip_key, ip_str, 64); + }else{ + inet_ntop(AF_INET6, ip_key, ip_str, 64); + } + + pthread_rwlock_wrlock(&flwd_thread_val[tid].flwd_ip_pool_static_rwlock); + + flwd_active_ip_t *flwd_active_ip = (flwd_active_ip_t *)flwd_ip_pool_search(nat_info->act_ip_origin, act_ip_htable, ip_key, ip_key_size); + if(NULL == flwd_active_ip){ + /* NOTE: 可能这期间此活跃IP已经超时被maat_callback移除, 也就不必回收端口, 直接返回即可 */ + flwd_log(20, "ACC:sport %u recycle, but active ip:%s is not exist!\n", recycle_sport, ip_str); + goto done; + } + + if(IPPROTO_TCP == nat_info->inner_nat_tuple5.protocol){ /* TCP, UDP区别处理 */ + usable_sport_queue_head = &flwd_active_ip->usable_tcp_sport_list_head; + }else{ + usable_sport_queue_head = &flwd_active_ip->usable_udp_sport_list_head; + } + + /* NOTE: 取可用端口时现场free掉MESA_list_count_t管理结构, 此处也是现场malloc */ + recycle_node = (MESA_list_count_t *)malloc(sizeof(MESA_list_count_t)); + memcpy(&recycle_node->quiddity, &recycle_sport, sizeof(short)); + /* 此处使用MESA_list_count_add_tail, 放在head->pre, 最后使用 */ + MESA_list_count_add_tail(usable_sport_queue_head, recycle_node); + +done: + pthread_rwlock_unlock(&flwd_thread_val[tid].flwd_ip_pool_static_rwlock); + + flwd_log(10, "ACC:sport recycle, active_ip:%s, port:%u\n", ip_str, recycle_sport); + + return; +} + + +static void flwd_access_nat_htable_data_free(void *data) +{ + flwd_nat_info_t *nat_info = (flwd_nat_info_t *)data; + + if(--nat_info->reference > 0){ + /* 两个key索引同一个nat_info, 删除时避免double free, 增加reference计数 */ + return; + } + + flwd_sport_recycle(nat_info); + + if(FLWD_IP_ADDR_TYPE_V6 == nat_info->inner_nat_tuple5.addr_type ){ + flwd_free(nat_info->tid, nat_info->inner_nat_tuple5.ippair_v6); + } + + if(FLWD_IP_ADDR_TYPE_V6 == nat_info->outer_nat_tuple5.addr_type ){ + flwd_free(nat_info->tid, nat_info->outer_nat_tuple5.ippair_v6); + } + +#if FLWD_USE_LTSM_FOR_QUICK_CLOSE + ltsm_destroy_handle(nat_info->ltsm_stat_handle); +#endif + + flwd_free(nat_info->tid, data); +} + +static int flwd_access_nat_htable_data_expire_notify( + void *data, int eliminate_type) +{ + char str_tmp[256]; + flwd_nat_info_t *nat_info = (flwd_nat_info_t *)data; + + if(1 == nat_info->reference){ /* 因为同时被两个key索引, 只最后一次记录日志, 否则会写两条一样的日志 */ + flwd_log(10, "pre-nat-tuple4 %s expire!\n", + flwd_tuple5_ntop_r(&nat_info->inner_nat_tuple5, str_tmp, 256)); + } + + return 1; +} + + + +/* + nat转发表不存在, 第一次创建nat_info时被调用, + 存储路由转发相关信息, 不用每次转发时都查询一次. +*/ +static int flwd_save_session_route_info(flwd_nat_info_t *nat_info, + flwd_raw_pkt_t *raw_pkt, flwd_active_ip_port_args_t *usable_active_ipport_args) +{ + const flwd_eth_hdr_t *ehdr = (flwd_eth_hdr_t *)raw_pkt->outer_pkt_data; + int ret; + + if(FLWD_NAT_TYPE_SNAT == nat_info->nat_type){ + /* user终端的MAC不用查询, 直接使用当前来包的mac存储下来即可 */ + memcpy(nat_info->flwd_route_info.inner_terminal_mac, ehdr->h_source, ETH_ALEN); + /* + TODO: + 如何得到下一跳转发网关的IP, 即转发网关IP? + 理论上同一个局点的所有设备都是联通的, 任选一个就可以, + 但可能造成单向流, + 如果能知道GDEV的分流算法最好了. + */ +#if 0 + nat_info->flwd_route_info.next_gateway_ip_net = flwd_search_fwd_ip_by_gdev_ip(usable_active_ipport_args->gdev_args.gdev_ip_net_order); +#else + /* TODO, 单台主机, 先写死, 使用本地回环 */ + inet_pton(AF_INET, "127.0.0.1", &nat_info->flwd_route_info.next_gateway_ip_net); +#endif + + memcpy(&nat_info->flwd_route_info.gdev_args, + &usable_active_ipport_args->gdev_args, + sizeof(flwd_gdev_associated_args_t)); + }else{ + /* TODO, DNAT */ + abort(); + } + + return 0; +} + + +/* + NOTE: + + HASH查找时, + 分为SNAT->C2S方向, SNAT->S2C方向, DNAT->C2S方向, DNAT->S2C方向, + 几个方向的地址不同, + 所以, 对于一个连接的内部地址和外部地址来说, 要生成两个key, 指向同一个nat_info. + + hash插入时, 对于同一个nat_info, 使用两个不同的key索引. + + 例如: + 192.168.10.100:12345->8.8.8.8:53发送一个DNS查询, + 出NAT网关后变成了: + 202.43.148.189:34567->8.8.8.8:53, + + 要用上面两个不同的四元组分别做为key, 但能找到同一个htable里的data, 即nat_info. +*/ +static int flwd_new_nat_session_sotre(int tid, flwd_nat_info_t *nat_info) +{ + int ret; + char inner_str[256], outer_str[256]; + + ret = MESA_htable_add(flwd_thread_val[tid].nat_info_table, + (unsigned char *)&nat_info->inner_nat_tuple5, + sizeof(flwd_tuple5_t), + nat_info); + if(ret < 0){ + flwd_log(30, "save nat session error, %s!\n", + flwd_tuple5_ntop_r(&nat_info->inner_nat_tuple5, inner_str, 256)); + nat_info->reference = 1; + flwd_access_nat_htable_data_free(nat_info); + return -1; + } + nat_info->reference++; + + ret = MESA_htable_add(flwd_thread_val[tid].nat_info_table, + (unsigned char *)&nat_info->outer_nat_tuple5, + sizeof(flwd_tuple5_t), + nat_info); + if(ret < 0){ + nat_info->reference = 1; + flwd_log(30, "save nat session error, %s!\n", + flwd_tuple5_ntop_r(&nat_info->outer_nat_tuple5, outer_str, 256)); + /* NOTE: 插入失败, 此处要使用inner的key, 因为outer插入失败 */ + MESA_htable_del(flwd_thread_val[tid].nat_info_table, + (unsigned char *)&nat_info->inner_nat_tuple5, + sizeof(flwd_tuple5_t), + NULL); + return -1; + } + nat_info->reference++; + + return 0; +} + + +static inline flwd_nat_info_t *flwd_create_new_nat_info_v4(int tid, + flwd_nat_type_t nat_type, flwd_tuple5_t *inner_nat_key, + flwd_active_ip_port_args_t *usable_active_ipport_args, flwd_raw_pkt_t *raw_pkt) +{ + flwd_nat_info_t *nat_info = (flwd_nat_info_t *)flwd_calloc(tid, 1, sizeof(flwd_nat_info_t)); + flwd_tuple5_t *opposite_tuple5; + int differ; + unsigned int outer_actual_dip_net_order; /* 客户端去往真实的目标IP, inner和outer是一样的, 但在key的存储位置, 因sip,dip的大小关系可能不一样 */ + unsigned int outer_actual_dport_net_order; /* 客户端去往真实的目标端口, inner和outer是一样的, 但在key的存储位置, 因sip,dip的大小关系可能不一样 */ + + nat_info->tid = tid; /* 主要用于htable回调函数内部的tid参数传递 */ + nat_info->nat_type = nat_type; + + if(FLWD_NAT_TYPE_SNAT == nat_type){ + /************************** 构建 inner tuple5 **************************/ + memcpy(&nat_info->inner_nat_tuple5, inner_nat_key, sizeof(flwd_tuple5_t)); /* snat的inner tuple5直接copy */ + + + /*************************** 构建outer tuple5 **************************/ + opposite_tuple5 = &nat_info->outer_nat_tuple5; + + if(inner_nat_key->dir_reverse){ + outer_actual_dip_net_order = inner_nat_key->ippair_v4.sip_net_order; + outer_actual_dport_net_order = inner_nat_key->sport_net_order; + }else{ + outer_actual_dip_net_order = inner_nat_key->ippair_v4.dip_net_order; + outer_actual_dport_net_order = inner_nat_key->dport_net_order; + } + + /* NOTE: 两个无符号整数比较大小, 一定要直接比较, 避免用 c = a - b; if(c < 0)的方式!!! */ + if(usable_active_ipport_args->act_sip_net_order.addr_ipv4 > outer_actual_dip_net_order){ + /* 外部地址的真实源IP是刚选择的活跃IP, 真实目的IP就是当前inner_key的目的ip */ + opposite_tuple5->ippair_v4.sip_net_order = usable_active_ipport_args->act_sip_net_order.addr_ipv4; + opposite_tuple5->ippair_v4.dip_net_order = outer_actual_dip_net_order; + opposite_tuple5->sport_net_order = usable_active_ipport_args->act_sport_net_order; + opposite_tuple5->dport_net_order = outer_actual_dport_net_order; + + opposite_tuple5->dir_reverse = 0; /* 存储的key跟真实四元组地址是一样的 */ + }else if(usable_active_ipport_args->act_sip_net_order.addr_ipv4 < outer_actual_dip_net_order){ + opposite_tuple5->ippair_v4.sip_net_order = outer_actual_dip_net_order; + opposite_tuple5->ippair_v4.dip_net_order = usable_active_ipport_args->act_sip_net_order.addr_ipv4; + opposite_tuple5->sport_net_order = outer_actual_dport_net_order; + opposite_tuple5->dport_net_order = usable_active_ipport_args->act_sport_net_order; + + opposite_tuple5->dir_reverse = 1; /* 存储的key跟真实四元组地址是相反的 */ + }else{ + /* inner和outer的IP地址相同, 理论上在IP复用系统上是不可能的!!! */ + abort(); + } + + opposite_tuple5->addr_type = inner_nat_key->addr_type; /* TODO, 预留6to4地址转换能力?? */ + opposite_tuple5->protocol = inner_nat_key->protocol; + + /* TODO, 根据选择的活跃IP, 查关联表, 确定应该从哪个FWD发出 */ + ////nat_info->next_hop_ip_net_order = flwd_search_fwd_ip_by_gdev_ip(usable_active_ipport_args->related_gdev_ip_net_order); + }else{ +#if FLWD_SUPPORT_DNAT + /* TODO, + DNAT只需简单替换dip, dport即可, 不用考虑怎么挑选端口, 端口复用问题, + 因为对于一个虚拟服务, 真实源IP,源port已经靠外网主机的协议栈保证无冲突了. + */ + assert(0); +#endif + } + + flwd_save_session_route_info(nat_info, raw_pkt, usable_active_ipport_args); + + return nat_info; +} + + +static inline flwd_nat_info_t *flwd_create_new_nat_info_v6(int tid, + flwd_nat_type_t nat_type, flwd_tuple5_t *inner_tuple5_v6, + flwd_active_ip_port_args_t *usable_active_ipport_args, flwd_raw_pkt_t *raw_pkt) +{ + flwd_nat_info_t *nat_info = (flwd_nat_info_t *)flwd_calloc(tid, 1, sizeof(flwd_nat_info_t)); + flwd_tuple5_t *opposite_tuple5; + int differ; + struct in6_addr outer_actual_dip_net_order; /* 客户端去往真实的目标IP, inner和outer是一样的, 但在key的存储位置, 因sip,dip的大小关系可能不一样 */ + unsigned int outer_actual_dport_net_order; /* 客户端去往真实的目标端口, inner和outer是一样的, 但在key的存储位置, 因sip,dip的大小关系可能不一样 */ + + nat_info->tid = tid; /* 主要用于htable回调函数内部的tid参数传递 */ + nat_info->nat_type = nat_type; + + nat_info->inner_nat_tuple5.ippair_v6 = (flwd_ippair_v6_t *)flwd_malloc(tid, sizeof(flwd_ippair_v6_t)); + nat_info->outer_nat_tuple5.ippair_v6 = (flwd_ippair_v6_t *)flwd_malloc(tid, sizeof(flwd_ippair_v6_t)); + + if(FLWD_NAT_TYPE_SNAT == nat_type){ + /********* 构建 inner tuple5 ******/ + memcpy(&nat_info->inner_nat_tuple5, inner_tuple5_v6, sizeof(flwd_tuple5_t)); /* snat的inner tuple5直接copy */ + memcpy(nat_info->inner_nat_tuple5.ippair_v6, inner_tuple5_v6->ippair_v6, sizeof(flwd_ippair_v6_t)); + + opposite_tuple5 = &nat_info->outer_nat_tuple5; + /********* 构建outer tuple5 ******/ + + if(inner_tuple5_v6->dir_reverse){ + outer_actual_dip_net_order = inner_tuple5_v6->ippair_v6->sip_net_order; + outer_actual_dport_net_order = inner_tuple5_v6->sport_net_order; + }else{ + outer_actual_dip_net_order = inner_tuple5_v6->ippair_v6->dip_net_order; + outer_actual_dport_net_order = inner_tuple5_v6->dport_net_order; + } + + differ = memcmp(&usable_active_ipport_args->act_sip_net_order.addr_ipv6, &outer_actual_dip_net_order, sizeof(struct in6_addr)) ; + if(differ > 0){ /* 外部地址的真实源IP是刚选择的活跃IP, 真实目的IP就是当前inner_key的目的ip */ + memcpy(&opposite_tuple5->ippair_v6->sip_net_order, &usable_active_ipport_args->act_sip_net_order.addr_ipv6, sizeof(struct in6_addr)); + memcpy(&opposite_tuple5->ippair_v6->dip_net_order, &outer_actual_dip_net_order, sizeof(struct in6_addr)); + opposite_tuple5->sport_net_order = usable_active_ipport_args->act_sport_net_order; + opposite_tuple5->dport_net_order = outer_actual_dport_net_order; + + opposite_tuple5->dir_reverse = 0; /* 存储的key跟真实四元组地址是一样的 */ + }else if(differ < 0){ + memcpy(&opposite_tuple5->ippair_v6->sip_net_order, &outer_actual_dip_net_order, sizeof(struct in6_addr)); + memcpy(&opposite_tuple5->ippair_v6->dip_net_order, &usable_active_ipport_args->act_sip_net_order.addr_ipv6, sizeof(struct in6_addr)); + opposite_tuple5->sport_net_order = outer_actual_dport_net_order; + opposite_tuple5->dport_net_order = usable_active_ipport_args->act_sport_net_order; + + opposite_tuple5->dir_reverse = 1; /* 存储的key跟真实四元组地址是相反的 */ + }else{ + /* inner和outer的IP地址相同, 理论上在flowood系统上是不可能的!!! */ + abort(); + } + + opposite_tuple5->addr_type = inner_tuple5_v6->addr_type; /* TODO, 预留6to4地址转换能力, 外部地址可能是根据策略得到, 不一定是固定由ipv4转ipv4 */ + opposite_tuple5->protocol = inner_tuple5_v6->protocol; + + ////nat_info->next_hop_ip_net_order = flwd_search_fwd_ip_by_gdev_ip(usable_active_ipport_args->related_gdev_ip_net_order); /* TODO, 根据选择的活跃IP, 查关联表, 确定应该从哪个FWD发出 */ + + }else{ +#if FLWD_SUPPORT_DNAT + assert(0); +#endif + } + + flwd_save_session_route_info(nat_info, raw_pkt, usable_active_ipport_args); + + return nat_info; +} + + + +static flwd_nat_info_t *flwd_create_new_nat_info(int tid, + flwd_nat_type_t nat_type, flwd_tuple5_t *inner_nat_key, + flwd_active_ip_port_args_t *usable_active_ipport_args, flwd_raw_pkt_t *raw_pkt) +{ + flwd_nat_info_t *nat_info; + + if(flwd_likely(FLWD_IP_ADDR_TYPE_V4 == inner_nat_key->addr_type)){ + nat_info = flwd_create_new_nat_info_v4(tid, nat_type, inner_nat_key, + usable_active_ipport_args, raw_pkt); + }else{ + nat_info = flwd_create_new_nat_info_v6(tid, nat_type, inner_nat_key, + usable_active_ipport_args, raw_pkt); + } + + if(FLWD_IP_REGION_INLAND == usable_active_ipport_args->dip_region_type){ + nat_info->act_ip_region = FLWD_IP_REGION_OUTLAND; + }else{ + nat_info->act_ip_region = FLWD_IP_REGION_INLAND; + } + + nat_info->nat_type = nat_type; + +#if FLWD_USE_LTSM_FOR_QUICK_CLOSE + nat_info->ltsm_stat_handle = ltsm_create_handle(); +#endif + return nat_info; +} + + + +static flwd_nat_info_t * flwd_new_nat_session(flwd_device_handle_t *device_handle, + int tid, flwd_tuple5_t *nat_key, flwd_raw_pkt_t *raw_pkt) +{ + int ret; + flwd_nat_info_t *nat_info = NULL; + flwd_active_ip_port_args_t usable_active_ipport_args; + flwd_actual_ip_port_t actual_ipport_net_order; + + memset(&usable_active_ipport_args, 0, sizeof(flwd_active_ip_port_args_t)); + + usable_active_ipport_args.dip_region_type = flwd_dstip_location(nat_key); /* 查看目标IP所处地理位置 */ + usable_active_ipport_args.tid = tid; + usable_active_ipport_args.nat_key = nat_key; + usable_active_ipport_args.protocol = nat_key->protocol; + + if(TOPO_ACC_LINK_USER == device_handle->io_para.topo_mode){ + /* ACC_LINK接口, NAT转发表不存在, 肯定是SNAT的首包, 先查找NAT复用策略 */ + ret = flwd_access_snat_search_policy(tid, nat_key, &usable_active_ipport_args); + if(0 == ret){ + /* 无策略, 在dynamic-ip-pool里随机挑选一个IP */ + ret = flwd_access_snat_fetch_usable_ip_from_pool(flwd_thread_val[tid].flwd_ip_pool_dynamic_htable[nat_key->addr_type][(int)usable_active_ipport_args.dip_region_type ^ 1], + &flwd_thread_val[tid].flwd_ip_pool_dynamic_rwlock, + tid, nat_key, &usable_active_ipport_args); + if(ret < 0){ + flwd_log(RLOG_LV_INFO, "tuple4 %s not hit any policy, but no according usable active ip in %s dynamic pool !\n", + flwd_tuple5_ntop(tid, nat_key), + flwd_ip_region_ntop((int)usable_active_ipport_args.dip_region_type ^ 1)); + + if(flwd_cfg_val.use_static_pool_ip_if_no_dynamic != 0){ + ret = flwd_access_snat_fetch_usable_ip_from_pool(flwd_thread_val[tid].flwd_ip_pool_static_htable[nat_key->addr_type][(int)usable_active_ipport_args.dip_region_type ^ 1], + &flwd_thread_val[tid].flwd_ip_pool_static_rwlock, + tid, nat_key, &usable_active_ipport_args); + if(ret < 0){ + flwd_log(RLOG_LV_FATAL, "tuple4 %s not hit any policy, but no according usable ip in all %s static and dynamic pool!\n", + flwd_tuple5_ntop(tid, nat_key), + flwd_ip_region_ntop((int)usable_active_ipport_args.dip_region_type ^ 1)); + return NULL; + } + }else{ + flwd_log(RLOG_LV_FATAL, "tuple4 %s not hit any policy, but no according usable active ip in %s dynamic pool, unfortunately 'use_static_pool_ip_if_no_dynamic' is disable!\n", + flwd_tuple5_ntop(tid, nat_key), + flwd_ip_region_ntop((int)usable_active_ipport_args.dip_region_type ^ 1)); + return NULL; + } + } + }else if(-1 == ret){ + if(flwd_cfg_val.use_dynamic_pool_ip_if_no_static != 0){ + ret = flwd_access_snat_fetch_usable_ip_from_pool(flwd_thread_val[tid].flwd_ip_pool_static_htable[nat_key->addr_type][(int)usable_active_ipport_args.dip_region_type ^ 1], + &flwd_thread_val[tid].flwd_ip_pool_static_rwlock, + tid, nat_key, &usable_active_ipport_args); + if(ret < 0){ + flwd_log(RLOG_LV_FATAL, "tuple4 %s hit policy, but no according usable ip in all %s static and dynamic pool!\n", + flwd_tuple5_ntop(tid, nat_key), + flwd_ip_region_ntop((int)usable_active_ipport_args.dip_region_type ^ 1)); + return NULL; + } + }else{ + flwd_log(RLOG_LV_FATAL, "tuple4 %s hit policy, but no according usable ip in %s static pool, unfortunately 'use_dynamic_pool_ip_if_no_static' is disable!\n", + flwd_tuple5_ntop(tid, nat_key), + flwd_ip_region_ntop((int)usable_active_ipport_args.dip_region_type ^ 1)); + return NULL; + } + } + + nat_info = flwd_create_new_nat_info(tid, FLWD_NAT_TYPE_SNAT, nat_key, + &usable_active_ipport_args, raw_pkt); + + flwd_log(RLOG_LV_DEBUG, "---before signature, tuple4:%s", + flwd_tuple5_ntop(tid, &nat_info->outer_nat_tuple5)); + flwd_acc_user_generate_pkt_signature(tid, nat_info, raw_pkt); + flwd_log(RLOG_LV_DEBUG, "---after signature, tuple4:%s", + flwd_tuple5_ntop(tid, &nat_info->outer_nat_tuple5)); + + }else if(TOPO_ACC_LINK_FWD == device_handle->io_para.topo_mode){ + /* FWD方向来的首包, 说明是DNAT第一个包, 根据负载均衡策略查找内网真实服务器IP和端口 */ +#if FLWD_SUPPORT_DNAT + flwd_search_access_dnat_lb_policy(tid, nat_key, &actual_ipport_net_order); + + nat_info = flwd_create_new_nat_info(tid, FLWD_NAT_TYPE_DNAT, nat_key, + &usable_active_ipport_args, raw_pkt); +#endif + } + + if(NULL == nat_info){ +#if FLWD_SUPPORT_DNAT + assert(0); +#endif + return NULL; + } + + if(flwd_new_nat_session_sotre(tid, nat_info) < 0){ + return NULL; + } + + char inner_tuple5_str[256]; + char outer_tuple5_str[256]; + + flwd_log(10, "ACC: create new nat session, inner_tuple4:%s, outer_tuple4:%s\n", + flwd_tuple5_ntop_r(&nat_info->inner_nat_tuple5, inner_tuple5_str, 256), + flwd_tuple5_ntop_r(&nat_info->outer_nat_tuple5, outer_tuple5_str, 256)); + + return nat_info; +} + +/* + access网关user收包, 如果nat表不存在, 要验证合法的首包, + 必须是TCP->SYN, 或其他UDP包. + + return value: + 0 : valid; + -1: invalid; +*/ +static int flwd_acc_user_first_pkt_verify(const char *raw_eth_data) +{ + const flwd_eth_hdr_t *flwd_ethhdr = (flwd_eth_hdr_t *)raw_eth_data; + const flwd_ipv4_hdr_t *flwd_ip4hdr; + const flwd_ipv6_hdr_t *flwd_ip6hdr; + unsigned short eth_type = ntohs(flwd_ethhdr->h_proto); + const flwd_tcp_hdr_t *flwd_tcphdr; + int ret = 0; + + if(ETH_P_IP == eth_type){ + flwd_ip4hdr = (flwd_ipv4_hdr_t *)((char *)flwd_ethhdr + sizeof(flwd_eth_hdr_t)); + if(IPPROTO_TCP == flwd_ip4hdr->ip_p){ + flwd_tcphdr = (flwd_tcp_hdr_t *)((char *)flwd_ip4hdr + flwd_ip4hdr->ip_hl * 4); + if(TH_SYN == flwd_tcphdr->th_flags){ + ret = 0; + }else{ + ret = -1; + } + }else if(IPPROTO_UDP == flwd_ip4hdr->ip_p){ + ret = 0; + } + }else if(ETH_P_IPV6 == eth_type){ + flwd_ip6hdr = (flwd_ipv6_hdr_t *)((char *)flwd_ethhdr + sizeof(flwd_eth_hdr_t)); + if(IPPROTO_TCP == flwd_ip6hdr->ip6_nxt_hdr){ + flwd_tcphdr = (flwd_tcp_hdr_t *)((char *)flwd_ip6hdr + sizeof(flwd_ipv6_hdr_t)); + if(TH_SYN == flwd_tcphdr->th_flags){ + ret = 0; + }else{ + ret = -1; + } + }else if(IPPROTO_UDP == flwd_ip4hdr->ip_p){ + ret = 0; + } + }else{ + assert(0); + } + + return ret; +} + +/* + access网关从fwd网关收包, 如果nat表不存在, 要验证合法的首包, + 必须是IP复用策略DNAT中的ip,port; + 同时还必须是TCP->SYN, 或其他UDP包. + + 如果是SNAT, 肯定应该能找到nat表中之前存储的元素. + + return value: + 0 : valid; + -1: invalid; +*/ +static int flwd_acc_fwd_first_pkt_verify(char *inner_mac_hdr) +{ + const flwd_eth_hdr_t *flwd_ethhdr = (flwd_eth_hdr_t *)inner_mac_hdr; + const flwd_ipv4_hdr_t *flwd_ip4hdr; + const flwd_ipv6_hdr_t *flwd_ip6hdr; + unsigned short eth_type = ntohs(flwd_ethhdr->h_proto); + const flwd_tcp_hdr_t *flwd_tcphdr; + int ret = 0; + + if(ETH_P_IP == eth_type){ + flwd_ip4hdr = (flwd_ipv4_hdr_t *)((char *)flwd_ethhdr + sizeof(flwd_eth_hdr_t)); + if(IPPROTO_TCP == flwd_ip4hdr->ip_p){ + flwd_tcphdr = (flwd_tcp_hdr_t *)((char *)flwd_ip4hdr + flwd_ip4hdr->ip_hl * 4); + if(TH_SYN == flwd_tcphdr->th_flags){ /* 对于DNAT来说 */ + ret = 0; + }if((TH_SYN|TH_ACK) == flwd_tcphdr->th_flags){ /* 对于SNAT来说 */ + ret = 0; + }else{ + ret = -1; + } + }else if(IPPROTO_UDP == flwd_ip4hdr->ip_p){ + ret = 0; + } + }else if(ETH_P_IPV6 == eth_type){ + flwd_ip6hdr = (flwd_ipv6_hdr_t *)((char *)flwd_ethhdr + sizeof(flwd_eth_hdr_t)); + if(IPPROTO_TCP == flwd_ip6hdr->ip6_nxt_hdr){ /* TODO, IPv6还有可能包括其他头部, 其实不应该这么直接取next_hdr */ + flwd_tcphdr = (flwd_tcp_hdr_t *)((char *)flwd_ip6hdr + sizeof(flwd_ipv6_hdr_t)); + if(TH_SYN == flwd_tcphdr->th_flags){ /* 对于DNAT来说 */ + ret = 0; + }if((TH_SYN|TH_ACK) == flwd_tcphdr->th_flags){ /* 对于SNAT来说 */ + ret = 0; + }else{ + ret = -1; + } + }else if(IPPROTO_UDP == flwd_ip4hdr->ip_p){ + ret = 0; + } + }else{ + assert(0); + } + + return ret; +} + +/* 客户端接入方向纯IP包, 或者剥去隧道后(如L2TP, PPTP等外层封装)的真实客户端内层数据包入口函数 */ +int flwd_access_kernal_pkt_input(flwd_device_handle_t *device_handle, + int tid, flwd_raw_pkt_t *raw_pkt) +{ + void *ip_strategy; + flwd_nat_info_t *nat_info; + char nat_key_buf[sizeof(flwd_tuple4v6_t)]; /* v4, v6共用, 使用v6空间最大的 */ + flwd_tuple5_t nat_key; + unsigned char dir_reverse; + int ret; + int ltsm_stat; + unsigned int out_opt = 0; + + ret = flwd_build_tuple4_key(tid, &nat_key, raw_pkt); + if(ret < 0){ + return -1; + } + + nat_info = (flwd_nat_info_t *)MESA_htable_search(flwd_thread_val[tid].nat_info_table, + (const uchar *)&nat_key, sizeof(flwd_tuple5_t)); + if(NULL == nat_info){ + if((TOPO_ACC_LINK_USER == device_handle->io_para.topo_mode) + && (flwd_acc_user_first_pkt_verify(raw_pkt->outer_pkt_data) < 0)){ + return -1; + }else if((TOPO_ACC_LINK_FWD == device_handle->io_para.topo_mode) + && (flwd_acc_fwd_first_pkt_verify(raw_pkt->inner_pkt_data) < 0)){ + return -1; + } + + nat_info = flwd_new_nat_session(device_handle, tid, &nat_key, raw_pkt); + if(NULL == nat_info){ + if(TOPO_ACC_LINK_FWD == device_handle->io_para.topo_mode){ + flwd_log(30, "recv pkt from gdev, but not found nat_info:%s!", + flwd_tuple5_ntop(tid, &nat_key)); + } + return -1; + } + out_opt |= FLWD_OUTOPT_FIRST_PKT; + } + + ltsm_stat = flwd_do_nat(device_handle, nat_info, raw_pkt); + + flwd_access_output(device_handle, tid, nat_info, raw_pkt, out_opt); + +#if FLWD_USE_LTSM_FOR_QUICK_CLOSE + if(FTSM_CLOSED == ltsm_stat){ + flwd_log(10, "tuple4 %s state is CLOSED, quick destroy nat_info!\n", + flwd_tuple5_ntop(tid, &nat_key)); + MESA_htable_del(flwd_thread_val[tid].nat_info_table, + (const uchar *)&nat_key, sizeof(flwd_tuple5_t), NULL); + } +#endif + + return 0; +} + +static void *flwd_access_nat_table_create(void) +{ + int opt_int; + MESA_htable_handle htable; + + htable = MESA_htable_born(); + assert(htable != NULL); + + opt_int = 0; /* 每个线程独立, 无锁模式 */ + MESA_htable_set_opt(htable, MHO_THREAD_SAFE, &opt_int, sizeof(int)); + + opt_int = 1024 * 256; + MESA_htable_set_opt(htable, MHO_HASH_SLOT_SIZE, &opt_int, sizeof(int)); + + MESA_htable_set_opt(htable, MHO_HASH_MAX_ELEMENT_NUM, &flwd_cfg_val.nat_htable_max_num, sizeof(int)); + + /* TODO, 目前先用超时淘汰, 对于TCP应该有完整状态机, TCP流结束主动淘汰, 尽早释放内存 */ + MESA_htable_set_opt(htable, MHO_EXPIRE_TIME, &flwd_cfg_val.nat_htable_timeout, sizeof(int)); + + opt_int = HASH_ELIMINATE_ALGO_LRU; + MESA_htable_set_opt(htable, MHO_ELIMIMINATE_TYPE, &opt_int, sizeof(int)); + + MESA_htable_set_opt(htable, MHO_CBFUN_KEY_COMPARE, (void *)&flwd_nat_htable_key_cmp, sizeof(void *)); + + MESA_htable_set_opt(htable, MHO_CBFUN_KEY_TO_INDEX, (void *)&flwd_nat_htable_key2index, sizeof(void *)); + + MESA_htable_set_opt(htable, MHO_CBFUN_COMPLEX_KEY_DUP, (void *)flwd_nat_htable_key_dup, sizeof(void *)); + + MESA_htable_set_opt(htable, MHO_CBFUN_COMPLEX_KEY_FREE, (void *)flwd_nat_htable_key_free, sizeof(void *)); + + MESA_htable_set_opt(htable, MHO_CBFUN_DATA_FREE, (void *)&flwd_access_nat_htable_data_free, sizeof(void *)); + + MESA_htable_set_opt(htable, MHO_CBFUN_DATA_EXPIRE_NOTIFY, (void *)&flwd_access_nat_htable_data_expire_notify, sizeof(void *)); + + opt_int = 1; + MESA_htable_set_opt(htable, MHO_AUTO_UPDATE_TIME, &opt_int, sizeof(int)); + + opt_int = 0; + MESA_htable_set_opt(htable, MHO_SCREEN_PRINT_CTRL, &opt_int, sizeof(int)); + + opt_int = 100; + MESA_htable_set_opt(htable, MHO_HASH_LIST_COLLIDE_THRESHOLD, &opt_int, sizeof(int)); + + char *err_log = (char *)"./flwd_hash_collide.log"; + MESA_htable_set_opt(htable, MHO_HASH_LOG_FILE, (void *)err_log, strlen(err_log)); + + int ret = MESA_htable_mature(htable); + assert(ret >= 0); + + return htable; +} + + +/* + 接入网关从转发网关方向收包, 都是vxlan包. +*/ +static int flwd_acc_fwd_pkt_input(flwd_device_handle_t *rcv_device_handle, + int tid, flwd_raw_pkt_t *raw_pkt) +{ + int ret; + + raw_pkt->inner_ip_layer_hdr = (const char *)raw_pkt->outer_pkt_data + FLWD_VXLAN_OUTER_PACKET_LEN + sizeof(flwd_eth_hdr_t); /* 跳过整个外层vxlan头部和内层ethernet头部 */ + raw_pkt->inner_pkt_data = (char *)raw_pkt->outer_pkt_data + FLWD_VXLAN_OUTER_PACKET_LEN; /* 指向内层ethernet起始地址 */ + raw_pkt->inner_pkt_len = raw_pkt->outer_pkt_len - FLWD_VXLAN_OUTER_PACKET_LEN; + ret = flwd_access_kernal_pkt_input(rcv_device_handle, tid, raw_pkt); + + return ret; +} + +int flwd_acc_pkt_input(flwd_device_handle_t *device_handle, int tid, flwd_raw_pkt_t *raw_pkt) +{ + int ret; + + flwd_thread_val[tid].pkt_stat.eth_pkt_num++; + flwd_thread_val[tid].pkt_stat.eth_pkt_byte += raw_pkt->outer_pkt_len; + + if(TOPO_ACC_LINK_FWD == device_handle->io_para.topo_mode){ + ret = flwd_rubbish_pkt_identify(device_handle, raw_pkt, 1); + }else{ + ret = flwd_rubbish_pkt_identify(device_handle, raw_pkt, 0); + } + if(ret != 0){ + return 0; + } + + ret = flwd_protocol_stack_process(device_handle, tid, raw_pkt); + if(ret != 0){ + return 0; + } + + switch((int)device_handle->io_para.topo_mode){ + case TOPO_ACC_LINK_USER: + flwd_log(10, "ACC: recv pkt from user client, tuple4:%s\n", + flwd_debug_print_tuple4_detail((char *)raw_pkt->outer_pkt_data + sizeof(flwd_eth_hdr_t), tid)); + flwd_acc_user_pkt_input(device_handle, tid, raw_pkt); + break; + + case TOPO_ACC_LINK_FWD: + { + char inner_tuple4[128]; + char outer_tuple4[128]; + flwd_log(10, "ACC: recv pkt from forward gateway, inner_tuple4:%s, outer_tuple4:%s\n", + flwd_debug_print_tuple4_detail_r((char *)raw_pkt->outer_pkt_data + FLWD_VXLAN_OUTER_PACKET_LEN + sizeof(flwd_eth_hdr_t), inner_tuple4, 128), + flwd_debug_print_tuple4_detail_r((char *)raw_pkt->outer_pkt_data + sizeof(flwd_eth_hdr_t), outer_tuple4, 128)); + + flwd_acc_fwd_pkt_input(device_handle, tid, raw_pkt); + } + break; + } + + return 0; +} + + +void *flwd_packet_io_work_thread(void *arg) +{ + int ret1, ret2, ret3; + int tseq = *((int *)arg); + flwd_raw_pkt_t raw_pkt; + unsigned long long last_idle_call_in_ms = 0; + flwd_device_handle_t *dev_handle; + int tot_work_times = 0; /* 总结收包, idle次数 */ + int success_work_times_in_recent_100 = 0; /* 最近100次成功的次数 */ + + while(1){ + dev_handle = &flwd_global_val.global_io_handle[TOPO_ACC_LINK_USER]; + ret1 = dev_handle->low_level_pkt_recv(dev_handle, tseq, &raw_pkt.low_level_mbuff); + if(ret1 >= 0){ + raw_pkt.outer_pkt_data = dev_handle->low_level_mbuff_mtod(raw_pkt.low_level_mbuff); + raw_pkt.outer_pkt_len = dev_handle->low_level_mbuff_get_pkt_len(raw_pkt.low_level_mbuff); + flwd_pre_process_pkt_input(dev_handle, &raw_pkt); + flwd_acc_pkt_input(dev_handle, tseq, &raw_pkt); + dev_handle->low_level_pkt_free(dev_handle, tseq, raw_pkt.low_level_mbuff); + } + + dev_handle = &flwd_global_val.global_io_handle[TOPO_ACC_LINK_FWD]; + ret2 = dev_handle->low_level_pkt_recv(dev_handle, tseq, &raw_pkt.low_level_mbuff); + if(ret2 >= 0){ + raw_pkt.outer_pkt_data = dev_handle->low_level_mbuff_mtod(raw_pkt.low_level_mbuff); + raw_pkt.outer_pkt_len = dev_handle->low_level_mbuff_get_pkt_len(raw_pkt.low_level_mbuff); + flwd_pre_process_pkt_input(dev_handle, &raw_pkt); + flwd_acc_pkt_input(dev_handle, tseq, &raw_pkt); + dev_handle->low_level_pkt_free(dev_handle, tseq, raw_pkt.low_level_mbuff); + } + + tot_work_times++; + + if((ret1 < 0) && (ret2 < 0)){ + ret3 = flwd_idle_call(tseq); + if(0 == ret3){ + /* 两个网卡都无数据, idle_call()也无事可做, 休眠一会 */ + flwd_adapt_sleep(success_work_times_in_recent_100); + }else{ + success_work_times_in_recent_100++; + } + }else{ + success_work_times_in_recent_100++; + } + + if(tot_work_times >= 100){ + /////printf("%d/100\n", success_work_times_in_recent_100); + tot_work_times = 0; + success_work_times_in_recent_100 = 0; + } + } + + return NULL; +} + +/* 接入网关初始化总入口 */ +int flwd_gateway_init(void) +{ + int i, ret; + char str_tmp[256]; + +#if FLWD_NO_ACTIVE_IP_DISCOVER + printf("\033[1;31;40m[Warning] TEST NO IP DISCOVER is enable!\033[0m\n" ); + sleep(2); +#endif + + MESA_load_profile_int_def(FLWD_CONFIG_FILE, "main", "nat_htable_max_num", &flwd_cfg_val.nat_htable_max_num, 50000); + MESA_load_profile_int_def(FLWD_CONFIG_FILE, "main", "nat_htable_timeout", &flwd_cfg_val.nat_htable_timeout, 30); + + for(i = 0; i < flwd_cfg_val.tot_thread_count; i++){ + flwd_thread_val[i].nat_info_table = flwd_access_nat_table_create(); + } + + + /* TODO, PPTP, L2TP server 初始化 */ + + /* TODO, IP地址池全局, ip-hot-pool 结构初始化 */ + + /* IP复用策略结构初始化, HASH表, 对于SNAT, key为dip+dport, 对于DNAT, key为RIP+RPORT */ + + ret = flwd_packet_io_init(TOPO_ACC_LINK_USER, TOPO_ACC_LINK_FWD); + if(ret < 0){ + return -1; + } + + ret = flwd_access_active_ip_init(); + if(ret < 0){ + return -1; + } + + ret = flwd_network_conn_init(); + if(ret < 0){ + return -1; + } + + /* NOTE: maat最好放在最后调用, 因为有callback, 初始化后回被立即调用, 如果其他数据结构还没初始化完成, 比如flwd_access_active_ip_init(), callback会出错! */ + ret = flwd_access_maat_init(); + if(ret < 0){ + return -1; + } + + return 0; +} + -- cgit v1.2.3