diff options
| author | Lu Qiuwen <[email protected]> | 2019-06-19 12:02:25 +0800 |
|---|---|---|
| committer | Lu Qiuwen <[email protected]> | 2019-06-19 12:02:25 +0800 |
| commit | 8d057461b81314930a2d36f871c30a7ce003b32b (patch) | |
| tree | 1c0fa1161338cb3551da576fa253fffff62bc565 /core.c | |
| parent | f3bbd2126b16ff144e7f57e81a80fe8d88fc988c (diff) | |
整理目录结构,修正当sendmsg()失败时sk_buff泄露的问题
Diffstat (limited to 'core.c')
| -rw-r--r-- | core.c | 963 |
1 files changed, 963 insertions, 0 deletions
@@ -0,0 +1,963 @@ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/netfilter.h> +#include <linux/if.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/file.h> +#include <linux/un.h> +#include <linux/syscalls.h> +#include <linux/version.h> + +#include <net/ip.h> +#include <net/tcp.h> +#include <net/netfilter/nf_queue.h> +#include <net/inet_hashtables.h> + +#include "cmsg.h" +#include "stat.h" + +#ifndef TFE_KMOD_VERSION +#define TFE_KMOD_VERSION "Unknown" +#endif + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Lu Qiuwen"); +MODULE_DESCRIPTION("Tango Frontend Engine Kernel Module"); +MODULE_VERSION(TFE_KMOD_VERSION); + +#ifdef PATH_MAX +#define TFE_STRING_SIZE PATH_MAX +#else +#define TFE_STRING_SIZE 2048 +#endif + +struct tfe_kmod_instance +{ + /* Path */ + char str_scm_socket_path[PATH_MAX]; + struct sockaddr_un sk_scm_socket_path; + + /* Config */ + char ifname_cmsg_input[IFNAMSIZ]; + char ifname_bfd_input[IFNAMSIZ]; + + /* Instances */ + struct net * netspace; + struct socket * scm_socket; + struct nf_hook_ops nf_hook_ops_cmsg_input; + struct nf_hook_ops nf_hook_ops_bfd_input; + struct net_device * netdev_cmsg_input; + struct net_device * netdev_bfd_input; + + /* workqueue */ + struct kthread_worker * k_worker; + + /* Stat */ + struct tfe_kmod_stat * stat; +}; + +static char * ifname_cmsg = "lo"; +static char * ifname_bfd = "lo"; +static char * scm_socket = "/var/run/.tfe_kmod_scm_socket"; +static char * scm_socket_kern = "/var/run/.tfe_kmod_scm_socket_kern"; +static unsigned int fd_so_mask = 0x65; + +static const char * rm_program_path = "/bin/rm"; +static char * rm_program_envp[] = { "HOME=/", "TERM=linux", "PATH=/usr/bin:/bin", NULL }; + +static struct tfe_kmod_instance tfe_kmod_instance; +struct tfe_kmod_instance * g_tfe_kmod_instance = &tfe_kmod_instance; + +module_param(ifname_cmsg, charp, S_IRUGO); +MODULE_PARM_DESC(ifname_cmsg, "Network interface for incoming control message"); +module_param(ifname_bfd, charp, S_IRUGO); +MODULE_PARM_DESC(ifname_bfd, "Network interface for BFD keepalive message"); +module_param(scm_socket, charp, S_IRUGO); +MODULE_PARM_DESC(scm_socket, "SCM socket file location"); + +static int __call_sys_close(int fd) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0) + return ksys_close(fd); +#else + return sys_close(fd); +#endif +} + +static inline bool is_ipv4_pkt(const struct sk_buff * skb) +{ + return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4; +} + +static inline bool is_ipv6_pkt(const struct sk_buff * skb) +{ + return skb->protocol == htons(ETH_P_IPV6) && ipv6_hdr(skb)->version == 6; +} + +static int __fd_send_to_tfe_userspace(struct socket * un_sock, + int fd_client, int fd_server, const uint8_t * msg, unsigned int msglen) +{ + union + { + char buf[CMSG_SPACE(sizeof(int) * 2)]; + struct cmsghdr align; + } controlMsg; + + struct cmsghdr *cmsgp; + struct msghdr msgh; + int result = 0; + + struct kvec iv; + iv.iov_base = (void *)msg; + iv.iov_len = msglen; + + memset(&msgh, 0, sizeof(msgh)); + msgh.msg_name = &g_tfe_kmod_instance->sk_scm_socket_path; + msgh.msg_namelen = sizeof(g_tfe_kmod_instance->sk_scm_socket_path); + msgh.msg_control = controlMsg.buf; + msgh.msg_controllen = sizeof(controlMsg.buf); + msgh.msg_flags = MSG_DONTWAIT; + + memset(controlMsg.buf, 0, sizeof(controlMsg.buf)); + cmsgp = CMSG_FIRSTHDR(&msgh); + cmsgp->cmsg_len = CMSG_LEN(sizeof(int) * 2); + cmsgp->cmsg_level = SOL_SOCKET; + cmsgp->cmsg_type = SCM_RIGHTS; + + ((int *) CMSG_DATA(cmsgp))[0] = fd_server; + ((int *) CMSG_DATA(cmsgp))[1] = fd_client; + + result = kernel_sendmsg(un_sock, &msgh, &iv, 1, msglen); + if(unlikely(result < 0) && (result == -ECONNREFUSED || result == -ENOENT)) + { + return result; + } + else if(unlikely(result < 0)) + { + pr_err("Failed at sendmsg to scm unix domain socket, result = %d\n", result); + return result; + } + + __call_sys_close(fd_client); + __call_sys_close(fd_server); + + return result; +} + +static int __tcp_restore_fd_create(struct net * netspace, const struct tcp_restore_info_endpoint * endpoint, + const struct tcp_restore_info_endpoint * peer) +{ + struct socket * __sock = NULL; + struct file * __file = NULL; + int __fd = 0; + + int result = 0; + int sockopt = 0; + + struct tcp_repair_opt tcp_repair_opts[8]; + unsigned int nr_tcp_repair_opts = 0; + + struct tcp_repair_window tcp_repair_window; + memset(&tcp_repair_window, 0, sizeof(struct tcp_repair_window)); + + if (endpoint->addr.ss_family == AF_INET) + { + result = sock_create_kern(netspace, AF_INET, SOCK_STREAM, IPPROTO_TCP, &__sock); + } + else if(endpoint->addr.ss_family == AF_INET6) + { + result = sock_create_kern(netspace, AF_INET6, SOCK_STREAM, IPPROTO_TCP, &__sock); + } + else + { + printk(KERN_ERR "Invalid endpoint's ss_family, ss_family = %x\n", endpoint->addr.ss_family); + goto errout; + } + + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at creating client's socket at kern\n"); + goto errout; + } + + sockopt = 1; + + /* Setup TCP REPAIR Status */ + result = kernel_setsockopt(__sock, SOL_SOCKET, SO_REUSEADDR, (char *)&sockopt, sizeof(sockopt)); + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at setsockopt(SO_REUSEADDR), result = %d\n", result); + goto errout; + } + + result = kernel_setsockopt(__sock, SOL_IP, IP_TRANSPARENT, (char *)&sockopt, sizeof(sockopt)); + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at setsockopt(IP_TRANSPARENT), result = %d\n", result); + goto errout; + } + + result = kernel_setsockopt(__sock, SOL_TCP, TCP_REPAIR, (char *)&sockopt, sizeof(sockopt)); + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at setsockopt(TCP_REPAIR) begin, result = %d\n", result); + goto errout; + } + + sockopt = fd_so_mask; + result = kernel_setsockopt(__sock, SOL_SOCKET, SO_MARK, (char *)&sockopt, sizeof(sockopt)); + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at setsockopt(SO_MARK), result = %d\n", result); + goto errout; + } + + /* Setup SEQ/ACK and TCP options */ + sockopt = TCP_SEND_QUEUE; + result = kernel_setsockopt(__sock, SOL_TCP, TCP_REPAIR_QUEUE, (char *)&sockopt, sizeof(sockopt)); + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at setsockopt(TCP_REPAIR_QUEUE), result = %d\n", result); + goto errout; + } + + sockopt = endpoint->seq; + result = kernel_setsockopt(__sock, SOL_TCP, TCP_QUEUE_SEQ, (char *)&sockopt, sizeof(sockopt)); + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at setsockopt(TCP_QUEUE_SEQ), result = %d\n", result); + goto errout; + } + + sockopt = TCP_RECV_QUEUE; + result = kernel_setsockopt(__sock, SOL_TCP, TCP_REPAIR_QUEUE, (char *)&sockopt, sizeof(sockopt)); + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at setsockopt(TCP_QUEUE_SEQ), result = %d\n", result); + goto errout; + } + + sockopt = endpoint->ack; + result = kernel_setsockopt(__sock, SOL_TCP, TCP_QUEUE_SEQ, (char *)&sockopt, sizeof(sockopt)); + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at setsockopt(TCP_QUEUE_SEQ), result = %d\n", result); + goto errout; + } + +#ifndef TCPOPT_MAXSEG +#define TCPOPT_MAXSEG 2 +#endif + +#ifndef TCPOPT_WINDOW +#define TCPOPT_WINDOW 3 +#endif + +#ifndef TCPOPT_SACK_PERMITTED +#define TCPOPT_SACK_PERMITTED 4 +#endif + + tcp_repair_opts[nr_tcp_repair_opts].opt_code = TCPOPT_MAXSEG; + tcp_repair_opts[nr_tcp_repair_opts].opt_val = min(endpoint->mss, peer->mss); + nr_tcp_repair_opts++; + + if (endpoint->sack_perm && peer->sack_perm) + { + tcp_repair_opts[nr_tcp_repair_opts].opt_code = TCPOPT_SACK_PERMITTED; + tcp_repair_opts[nr_tcp_repair_opts].opt_val = 0; + nr_tcp_repair_opts++; + } + + if (endpoint->wscale_perm) + { + tcp_repair_opts[nr_tcp_repair_opts].opt_code = TCPOPT_WINDOW; + tcp_repair_opts[nr_tcp_repair_opts].opt_val = (endpoint->wscale << 16 ) | peer->wscale; + nr_tcp_repair_opts++; + } + + /* Bind address and connect to peer endpoint */ + result = kernel_bind(__sock, (struct sockaddr *)&endpoint->addr, sizeof(endpoint->addr)); + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at bind(), result = %d\n", result); + goto errout; + } + + result = kernel_connect(__sock, (struct sockaddr *)&peer->addr, sizeof(peer->addr), 0); + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at connect(), result = %d\n", result); + goto errout; + } + + result = kernel_setsockopt(__sock, SOL_TCP, TCP_REPAIR_OPTIONS, (char *)tcp_repair_opts, + nr_tcp_repair_opts * sizeof(struct tcp_repair_opt)); + + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at setsockopt(TCP_REPAIR_OPTIONS), result = %d\n", result); + goto errout; + } + +#if 0 + /* Perpare Window Setup */ + tcp_repair_window.snd_wl1 = peer->seq; + tcp_repair_window.snd_wnd = peer->window; + tcp_repair_window.max_window = peer->window; + + tcp_repair_window.rcv_wnd = endpoint->window; + tcp_repair_window.rcv_wup = endpoint->ack; + + result = kernel_setsockopt(__sock, SOL_TCP, TCP_REPAIR_WINDOW, (char *)&tcp_repair_window, + sizeof(tcp_repair_window)); + + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at setsockopt(TCP_REPAIR_WINDOW), result = %d\n", result); + goto errout; + } +#endif + + sockopt = 0; + result = kernel_setsockopt(__sock, SOL_TCP, TCP_REPAIR, (char *)&sockopt, sizeof(sockopt)); + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at setsockopt(TCP_REPAIR) end, result = %d\n", result); + goto errout; + } + + /* alloc a new fd */ + __fd = get_unused_fd_flags(O_CLOEXEC); + if (unlikely(__fd < 0)) + { + pr_err("Failed at get_unused_fd_flags(), result = %d\n", __fd); + goto errout; + } + + /* __sock's ownership is transfered to __file */ + __file = sock_alloc_file(__sock, O_CLOEXEC, NULL); + __sock = NULL; + + if (unlikely(IS_ERR(__file))) + { + pr_err("Failed at sock_alloc_file(), result = %ld\n", PTR_ERR(__file)); + result = PTR_ERR(__file); goto errout; + } + + fd_install(__fd, __file); + return __fd; + +errout: + if(unlikely(__fd > 0)) + { + put_unused_fd(__fd); + } + + if(unlikely(__sock != NULL)) + { + sock_release(__sock); + } + + return result; +} + +static bool tcp_find_option(uint8_t option, const struct sk_buff *skb, + unsigned int protoff, unsigned int optlen, uint8_t * out_optlen, + char * out_optvalue, unsigned int out_optvalue_len) +{ + /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ + const uint8_t *op; + uint8_t _opt[60 - sizeof(struct tcphdr)]; + unsigned int i; + + if (!optlen) + return false; + + /* If we don't have the whole header, drop packet. */ + op = skb_header_pointer(skb, protoff + sizeof(struct tcphdr), optlen, _opt); + if (op == NULL) + { + pr_debug("skb=%p, don't have whole header, ignore it.\n", skb); + return false; + } + + for (i = 0; i < optlen; ) + { + uint8_t __optlen; + uint8_t __valuelen; + + if (op[i] == option) + { + if(op[i] < 2) + { + *out_optlen = 0; + return true; + } + + __optlen = op[i+1]; + if(unlikely(__optlen <= 2)) + { + pr_err_ratelimited("Invalid tcp option length, must be larger than 2, but the value is %u\n", __optlen); + return false; + } + + __valuelen = __optlen - 2; + if(unlikely(__valuelen > out_optvalue_len)) + { + pr_warn_ratelimited("tcp option length is larger input buffer\n"); + return false; + } + + *out_optlen = __valuelen; + memcpy(out_optvalue, &op[i+2], __valuelen); + return true; + } + + if (op[i] < 2) i++; + else i += op[i+1]?:1; + } + + return false; +} + +void tfe_kernel_tcp_restore_wkqueue_entry(struct kthread_work * wk) +{ + struct tcp_restore_info * restore_info = container_of(wk, struct tcp_restore_info, kthread_work); + struct tfe_kmod_instance * instance = restore_info->instance; + + int fd_client = 0; + int fd_server = 0; + int result = 0; + + tfe_kmod_stat_add(instance->stat, TFE_KMOD_STAT_RESTORE_NEW, 1); + + /* Create FDs */ + fd_client = __tcp_restore_fd_create(instance->netspace, &restore_info->client, &restore_info->server); + if(unlikely(fd_client < 0)) + { + tfe_kmod_stat_add(instance->stat, TFE_KMOD_STAT_RESTORE_FAIL_AT_RESTORE, 1); + goto errout; + } + + fd_server = __tcp_restore_fd_create(instance->netspace, &restore_info->server, &restore_info->client); + if(unlikely(fd_server < 0)) + { + tfe_kmod_stat_add(instance->stat, TFE_KMOD_STAT_RESTORE_FAIL_AT_RESTORE, 1); + goto errout; + } + + /* Send FDs to userspace application */ + result = __fd_send_to_tfe_userspace(instance->scm_socket, fd_client, fd_server, + restore_info->cmsg, restore_info->cmsg_len); + + if(unlikely(result < 0)) + { + tfe_kmod_stat_add(instance->stat, TFE_KMOD_STAT_RESTORE_FAIL_AT_SENDMSG, 1); + goto errout; + } + + tcp_restore_info_dump_to_log(restore_info); + goto out; + +errout: + if(unlikely(fd_client != 0)) + { + __call_sys_close(fd_client); + } + + if(unlikely(fd_server != 0)) + { + __call_sys_close(fd_server); + } + + goto out; + +out: + kfree(restore_info); + nf_reinject(restore_info->nf_queue_entry, NF_ACCEPT); + return; +} + +union __cmsg_so_mark +{ + __u32 mark_value; + struct + { + char magic[2]; + __u16 offset; + } mark_struct; +}; + +static bool __is_cmsg_by_mark(const struct sk_buff * skb) +{ + union __cmsg_so_mark * mark = (union __cmsg_so_mark *)&skb->mark; + return !!(mark->mark_struct.magic[0] == 0x4d && mark->mark_struct.magic[1] == 0x5a); +} + +static void __set_cmsg_by_mark(struct sk_buff * skb, uint16_t cmsg_offset) +{ + union __cmsg_so_mark * mark = (union __cmsg_so_mark *)&skb->mark; + mark->mark_struct.magic[0] = 0x4d; + mark->mark_struct.magic[1] = 0x5a; + mark->mark_struct.offset = cmsg_offset; +} + +static bool __get_cmsg_by_mark(struct sk_buff * skb, uint16_t * cmsg_offset) +{ + union __cmsg_so_mark * mark = (union __cmsg_so_mark *)&skb->mark; + if(__is_cmsg_by_mark(skb)) + { + *cmsg_offset = mark->mark_struct.offset; + return true; + } + + return false; +} + +static void tfe_kernel_tcp_queue_hook_drop(struct net * net) +{ + return; +} + +int tfe_kernel_tcp_queue_outfn(struct nf_queue_entry *entry, unsigned int queuenum) +{ + struct sk_buff * skb = entry->skb; + struct tfe_kmod_instance * instance = g_tfe_kmod_instance; + + struct tcphdr _tcp_hdr; + struct tcphdr * tcp_hdr; + unsigned int tcp_hdr_len; + + const uint8_t * tcp_payload; + unsigned int tcp_payload_len; + + const uint8_t * cmsg_payload; + unsigned int cmsg_payload_len; + + /* Restore info */ + struct tcp_restore_info * restore_info = NULL; + + /* TCP Options */ + uint16_t cmsg_offset; + + /* Result */ + int result = 0; + if(!__get_cmsg_by_mark(skb, &cmsg_offset)) + { + goto errout; + } + + skb_linearize(skb); + + /* TCP Header */ + tcp_hdr = skb_header_pointer(skb, skb_transport_offset(skb), sizeof(_tcp_hdr), &_tcp_hdr); + tcp_hdr_len = tcp_hdr->doff * 4; + + /* Begin to construct restore information, only happends on cmsg arrivals at first time */ + BUG_ON(!(skb_transport_header(skb) != NULL)); + BUG_ON(!(skb->len >= (skb_transport_offset(skb) + tcp_hdr_len))); + + /* TCP Payload */ + tcp_payload = skb_transport_header(skb) + tcp_hdr_len; + tcp_payload_len = skb->len - skb_transport_offset(skb) - tcp_hdr_len; + + /* Cmsg */ + BUG_ON(tcp_payload_len < cmsg_offset); + cmsg_payload = tcp_payload + cmsg_offset; + cmsg_payload_len = tcp_payload_len - cmsg_offset; + + restore_info = (struct tcp_restore_info *)kmalloc(sizeof(struct tcp_restore_info), GFP_ATOMIC); + if(unlikely(restore_info == NULL)) + { + pr_err("Failed at alloc memory for restore info, drop it.\n"); + goto errout; + } + + /* Init the restore info */ + kthread_init_work(&restore_info->kthread_work, tfe_kernel_tcp_restore_wkqueue_entry); + restore_info->instance = g_tfe_kmod_instance; + restore_info->nf_queue_entry = entry; + + /* Construct restore info from packet */ + result = tcp_restore_info_parse_from_skb(skb, restore_info); + if(unlikely(result < 0)) + { + goto errout; + } + + /* ClientHello with leader */ + result = tcp_restore_info_parse_from_cmsg(cmsg_payload, cmsg_payload_len, restore_info); + if(unlikely(result < 0)) + { + goto errout; + } + + /* Remove cmsg from clienthello, trim the skb and ip payload length */ + skb_trim(skb, skb->len - cmsg_payload_len); + if(skb->protocol == ntohs(ETH_P_IP)) + { + struct iphdr * p_ip_hdr = ip_hdr(skb); + p_ip_hdr->tot_len = htons(ntohs(p_ip_hdr->tot_len) - cmsg_payload_len); + } + else if(skb->protocol == ntohs(ETH_P_IPV6)) + { + struct ipv6hdr * p_ipv6_hdr = ipv6_hdr(skb); + p_ipv6_hdr->payload_len = htons(ntohs(p_ipv6_hdr->payload_len) - cmsg_payload_len); + } + + skb->csum_valid = 1; + + /* Defer the tcp restore workload to kthread */ + tcp_restore_info_dump_to_log(restore_info); + kthread_queue_work(instance->k_worker, &restore_info->kthread_work); + + /* success, entry will be free in kthread_work */ + return 0; + +errout: + return -ENOBUFS; +} + +unsigned int tfe_kernel_tcp_hook_entry(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) +{ + /* Filter ClientHello from all incoming packets, + * ClientHello may exist in all TCP packets */ + + struct tfe_kmod_instance * instance; + struct iphdr * ip_hdr = NULL; + struct ipv6hdr * ipv6_hdr = NULL; + + /* TCP header */ + struct tcphdr _tcp_hdr; + struct tcphdr * tcp_hdr; + unsigned int tcp_hdr_len; + + /* TCP Options */ + uint8_t restore_tcpopt_length; + bool is_hit_tcpopt; + uint16_t cmsg_offset; + + instance = (struct tfe_kmod_instance *)priv; + BUG_ON(instance == NULL || instance->netspace == NULL); + +#if 0 + if(state->in->ifindex != instance->netdev_cmsg_input->ifindex) + { + goto not_mine; + } +#endif + + if(skb->protocol == htons(ETH_P_IP)) + { + struct iphdr _ip_hdr; + ip_hdr = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip_hdr), &_ip_hdr); + + /* Not continous at IP header, or not TCP protocol, ignore it */ + if (!ip_hdr || ip_hdr->version != 4 || ip_hdr->protocol != IPPROTO_TCP) + { + goto not_mine; + } + } + else if(skb->protocol == htons(ETH_P_IPV6)) + { + struct ipv6hdr _ipv6_hdr; + ipv6_hdr = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ipv6_hdr), &_ipv6_hdr); + + /* Not continous at IPv6 header, or not TCP protocol, ignore it */ + if (!ipv6_hdr || ipv6_hdr->version != 6 || ipv6_hdr->nexthdr != IPPROTO_TCP) + { + goto not_mine; + } + } + else + { + goto not_mine; + } + + tcp_hdr = skb_header_pointer(skb, skb_transport_offset(skb), sizeof(_tcp_hdr), &_tcp_hdr); + tcp_hdr_len = tcp_hdr->doff * 4; + + /* Check if there is a tcp options */ + if(!tcp_hdr || tcp_hdr_len <= sizeof(struct tcphdr)) + { + goto not_mine; + } + + is_hit_tcpopt = tcp_find_option(TCP_RESTORE_TCPOPT_KIND, skb, skb_transport_offset(skb), + tcp_hdr_len - sizeof(struct tcphdr), &restore_tcpopt_length, + (char *)&cmsg_offset, sizeof(cmsg_offset)); + + if(!is_hit_tcpopt || restore_tcpopt_length != 2) + { + goto not_mine; + } + + cmsg_offset = ntohs(cmsg_offset); + + /* TCPOPT match, set mark and save cmsg offset */ + __set_cmsg_by_mark(skb, cmsg_offset); + + /* Stat */ + tfe_kmod_stat_add(instance->stat, TFE_KMOD_STAT_NF_HOOK_QUEUE, 1); + tfe_kmod_stat_add(instance->stat, TFE_KMOD_STAT_CMSG_IN_QUEUE, 1); + + /* defer the skb to queue, handle in tfe_kernel_tcp_queue_outfn() */ + return NF_QUEUE; + +not_mine: + return NF_ACCEPT; +} + +static int __rlimit_fs(struct task_struct *tsk, unsigned int fs_limit) +{ + int retval = 0; + if (fs_limit > 65536) + { + return -EPERM; + } + + /* protect tsk->signal and tsk->sighand from disappearing */ + if (!tsk->sighand) + { + retval = -ESRCH; + goto out; + } + + task_lock(tsk->group_leader); + tsk->signal->rlim[RLIMIT_NOFILE].rlim_cur = fs_limit; + tsk->signal->rlim[RLIMIT_NOFILE].rlim_max = fs_limit; + task_unlock(tsk->group_leader); + +out: + return retval; +} + +static int __netfilter_ops_register(struct net * netspace, + struct nf_hook_ops * ops, const char * ifname, nf_hookfn * fn, void * arg) +{ + int result = 0; + + ops->hook = fn; + + ops->hooknum = NF_INET_PRE_ROUTING; + ops->pf = NFPROTO_INET; + ops->dev = NULL; + ops->priv = arg; + + result = nf_register_net_hook(netspace, ops); + if (unlikely(result < 0)) + { + pr_err("cannot register netfilter ops for device %s, result = %d\n", ifname, result); + goto errout; + } + + return result; + +errout: + return result; +} + +static struct socket * __scm_socket_create(struct net * netspace, const char * path) +{ + struct sockaddr_un un_addr; + struct sockaddr_un un_addr_target; + + struct socket * un_sock = NULL; + int result; + + char * rm_ops_argv[] = + { + (char *)rm_program_path, + "-f", + (char *)path, + NULL + }; + + memset(&un_addr, 0, sizeof(un_addr)); + un_addr.sun_family = AF_UNIX; + strcpy(un_addr.sun_path, path); + + memset(&un_addr_target, 0, sizeof(un_addr_target)); + un_addr_target.sun_family = AF_UNIX; + strcpy(un_addr_target.sun_path, g_tfe_kmod_instance->str_scm_socket_path); + + pr_debug("%s %s %s\n", rm_program_path, "-f", path); + result = call_usermodehelper(rm_program_path, rm_ops_argv, rm_program_envp, UMH_WAIT_PROC); + + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at removing file %s, result = %d\n", path, result); + goto errout; + } + + result = sock_create_kern(netspace, AF_UNIX, SOCK_DGRAM, 0, &un_sock); + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at creating unixdomain socket for %s\n", un_addr.sun_path); + goto errout; + } + +#if 0 + result = kernel_connect(un_sock, (struct sockaddr *)&un_addr_target, sizeof(un_addr_target), 0); + if (unlikely(result == -ENOTCONN)) + { + goto success; + } + + if (unlikely(result < 0)) + { + printk(KERN_ERR "Failed at connection unixdomain socket to %s\n", un_addr_target.sun_path); + goto errout; + } +#endif + + return un_sock; + +errout: + if (un_sock) sock_release(un_sock); + return NULL; +} + +static int tfe_kmod_instance_deinit(struct tfe_kmod_instance * instance) +{ + if(instance->scm_socket != NULL) + { + sock_release(instance->scm_socket); + } + + if(instance->nf_hook_ops_cmsg_input.hook) + { + nf_unregister_net_hook(instance->netspace, &instance->nf_hook_ops_cmsg_input); + } + + if(instance->nf_hook_ops_bfd_input.hook) + { + nf_unregister_net_hook(instance->netspace, &instance->nf_hook_ops_bfd_input); + } + + if(instance->netdev_bfd_input) + { + dev_put(instance->netdev_bfd_input); + } + + if(instance->netdev_cmsg_input) + { + dev_put(instance->netdev_cmsg_input); + } + + if(instance->k_worker) + { + kthread_destroy_worker(instance->k_worker); + } + + if(instance->stat) + { + tfe_kmod_stat_destroy(instance->stat); + } + + nf_unregister_queue_handler(instance->netspace); + return 0; +} + +static const struct nf_queue_handler tfe_kmod_nfqh = +{ + .outfn= tfe_kernel_tcp_queue_outfn, + .nf_hook_drop = tfe_kernel_tcp_queue_hook_drop +}; + +static int tfe_kmod_instance_init(struct tfe_kmod_instance * instance) +{ + int result = 0; + + instance->netspace = &init_net; + instance->scm_socket = __scm_socket_create(instance->netspace, scm_socket_kern); + if (instance->scm_socket == NULL) + { + pr_err("Failed at creating scm socket for %s\n", scm_socket_kern); + goto errout; + } + + instance->sk_scm_socket_path.sun_family = AF_UNIX; + strcpy(instance->sk_scm_socket_path.sun_path, instance->str_scm_socket_path); + + instance->netdev_cmsg_input = dev_get_by_name(instance->netspace, instance->ifname_cmsg_input); + if(instance->netdev_cmsg_input == NULL) + { + pr_err("Failed at finding cmsg input device %s\n", instance->ifname_cmsg_input); + goto errout; + } + + pr_info("CMSG Incoming Interface: %s(%p)\n", instance->netdev_cmsg_input->name, instance->netdev_cmsg_input); + + result = __netfilter_ops_register(instance->netspace, &instance->nf_hook_ops_cmsg_input, + instance->ifname_cmsg_input, tfe_kernel_tcp_hook_entry, instance); + + if (result < 0) + { + pr_err("Failed at register netfilter for cmsg at interface %s\n", instance->ifname_cmsg_input); + goto errout; + } + + instance->k_worker = kthread_create_worker(0, "tfe-tcp-restore"); + if(IS_ERR(instance->k_worker)) + { + pr_err("Failed at creating kthread worker for tcp restore, result = %ld\n", PTR_ERR(instance->k_worker)); + instance->k_worker = NULL; goto errout; + } + + result = __rlimit_fs(instance->k_worker->task, 65535); + if(result < 0) + { + pr_err("Failed at set NOFILE limit for kthread worker, result = %d\n", result); + goto errout; + } + + instance->stat = tfe_kmod_stat_create(instance->netspace); + if(!instance->stat) + { + pr_err("Failed at creating stat handler.\n"); + goto errout; + } + + nf_register_queue_handler(instance->netspace, &tfe_kmod_nfqh); + return 0; + +errout: + tfe_kmod_instance_deinit(instance); + return -EFAULT; +} + +static int __init tfe_kmod_init(void) +{ + int result; + pr_info("Tango Frontend Engine Kernel Module, Version: %s\n", TFE_KMOD_VERSION); + + strlcpy(g_tfe_kmod_instance->ifname_cmsg_input, ifname_cmsg, sizeof(g_tfe_kmod_instance->ifname_cmsg_input)); + strlcpy(g_tfe_kmod_instance->ifname_bfd_input, ifname_bfd, sizeof(g_tfe_kmod_instance->ifname_bfd_input)); + strlcpy(g_tfe_kmod_instance->str_scm_socket_path, scm_socket, sizeof(g_tfe_kmod_instance->str_scm_socket_path)); + + result = tfe_kmod_instance_init(g_tfe_kmod_instance); + if (unlikely(result < 0)) + { + pr_err("Tango Frontend Engine - KMOD init failed."); + return result; + } + + pr_info("Tango Frontend Engine Kernel Module init success.\n"); + return 0; +} + +static void __exit tfe_kmod_exit(void) +{ + tfe_kmod_instance_deinit(g_tfe_kmod_instance); +} + +module_init(tfe_kmod_init); +module_exit(tfe_kmod_exit); |
