diff options
| author | zy <[email protected]> | 2023-11-16 13:11:45 +0800 |
|---|---|---|
| committer | zy <[email protected]> | 2023-11-16 13:11:45 +0800 |
| commit | 4c63686513c0cce1b64f7aca2d6a9f2a2a379e98 (patch) | |
| tree | d6e6d5c09c01e0d1f539cd1941f1e65bbe36c7e0 /kernel/monitor_kernel_task.c | |
| parent | b0365d12e761d268e47881c4a218681e78da3221 (diff) | |
Diffstat (limited to 'kernel/monitor_kernel_task.c')
| -rw-r--r-- | kernel/monitor_kernel_task.c | 377 |
1 files changed, 377 insertions, 0 deletions
diff --git a/kernel/monitor_kernel_task.c b/kernel/monitor_kernel_task.c new file mode 100644 index 0000000..3b57152 --- /dev/null +++ b/kernel/monitor_kernel_task.c @@ -0,0 +1,377 @@ +#include "monitor_kernel_task.h" +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/syscall.h> // for syscall_get_nr +#include <linux/irq.h> +#include <linux/sched/mm.h> // for get_task_mm +#include <linux/syscalls.h> +#include <linux/tracehook.h> + +struct stack_trace { + unsigned int nr_entries, max_entries; + unsigned long *entries; + int skip; /* input argument: How many entries to skip */ +}; + +struct stack_frame_user { + const void __user *next_fp; + unsigned long ret_addr; +}; + +static inline int diag_get_task_type(struct task_struct *tsk) { + if (orig_get_task_type) + return orig_get_task_type(&tsk->se); + return 0; +} + +static inline int orig_diag_cgroup_name(struct cgroup *cgrp, char *buf, + size_t buflen) { + if (orig_kernfs_name && cgrp && cgrp->kn) { + return orig_kernfs_name(cgrp->kn, buf, buflen); + } else { + return 0; + } +} + +static inline mm_info *find_mm_info(mm_tree *mm_tree, struct mm_struct *mm) { + mm_info *info; + if (mm == NULL) + return NULL; + info = radix_tree_lookup(&mm_tree->mm_tree, (unsigned long)mm); + return info; +} + +static void __diag_cgroup_name(struct task_struct *tsk, char *buf, + unsigned int count, int cgroup) { + int cgroup_id = cpuacct_cgrp_id; + + memset(buf, 0, count); + + if (cgroup == 1) { + cgroup_id = cpuset_cgrp_id; + } + + if (tsk && tsk->cgroups && tsk->cgroups->subsys && + tsk->cgroups->subsys[cgroup_id] && + tsk->cgroups->subsys[cgroup_id]->cgroup) { + orig_diag_cgroup_name(tsk->cgroups->subsys[cgroup_id]->cgroup, buf, count); + } +} + +static void diag_cgroup_name(struct task_struct *tsk, char *buf, + unsigned int count, int cgroup) { + __diag_cgroup_name(tsk, buf, count, cgroup); +} + +static int copy_stack_frame(const void __user *fp, + struct stack_frame_user *frame) { + int ret; + + ret = 1; + pagefault_disable(); + if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) + ret = 0; + pagefault_enable(); + + return ret; +} + +static int copy_stack_frame_remote(struct task_struct *tsk, + const void __user *fp, + struct stack_frame_user *frame) { + int ret; + struct mm_struct *mm; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + ret = orig_access_remote_vm(mm, (unsigned long)fp, frame, sizeof(*frame), 0); + mmput(mm); + + return ret; +} + +static inline void save_stack_trace_user_remote(struct task_struct *tsk, + struct stack_trace *trace) { + const struct pt_regs *regs = task_pt_regs(tsk); + const void __user *fp = (const void __user *)regs->bp; + int count = 0; + + if (in_atomic() || irqs_disabled()) { + return; + } + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = regs->ip; + + while (trace->nr_entries < trace->max_entries) { + struct stack_frame_user frame; + + frame.next_fp = NULL; + frame.ret_addr = 0; + + if (!copy_stack_frame_remote(tsk, fp, &frame)) { + break; + } + + if ((unsigned long)fp < regs->sp) + break; + + if (frame.ret_addr) { + trace->entries[trace->nr_entries++] = frame.ret_addr; + } else + break; + + if (fp == frame.next_fp) + break; + fp = frame.next_fp; + + count++; + /** + * 线上环境发现这里有hardlockup,这里强制退出 + */ + if (count >= trace->max_entries || count >= 100) + break; + } +} + +static inline void __save_stack_trace_user(struct stack_trace *trace) { + const struct pt_regs *regs = task_pt_regs(current); + const void __user *fp = (const void __user *)regs->bp; + int count = 0; + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = regs->ip; + + while (trace->nr_entries < trace->max_entries) { + struct stack_frame_user frame; + + frame.next_fp = NULL; + frame.ret_addr = 0; + if (!copy_stack_frame(fp, &frame)) + break; + if ((unsigned long)fp < regs->sp) + break; + if (frame.ret_addr) { + trace->entries[trace->nr_entries++] = frame.ret_addr; + } + if (fp == frame.next_fp) + break; + fp = frame.next_fp; + count++; + /** + * 线上环境发现这里有hardlockup,这里强制退出 + */ + if (count >= trace->max_entries || count >= 100) + break; + } +} + +void perfect_save_stack_trace_user(struct stack_trace *trace) { + /* + * Trace user stack if we are not a kernel thread + */ + if (current->mm) { + __save_stack_trace_user(trace); + } + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + +void diagnose_save_stack_trace_user(unsigned long *backtrace) { + struct stack_trace trace; + + memset(&trace, 0, sizeof(trace)); + memset(backtrace, 0, BACKTRACE_DEPTH2 * sizeof(unsigned long)); + trace.max_entries = BACKTRACE_DEPTH2; + trace.entries = backtrace; + perfect_save_stack_trace_user(&trace); +} + +void diagnose_save_stack_trace_user_remote(struct task_struct *tsk, + unsigned long *backtrace) { + struct stack_trace trace; + + memset(&trace, 0, sizeof(trace)); + memset(backtrace, 0, BACKTRACE_DEPTH2 * sizeof(unsigned long)); + trace.max_entries = BACKTRACE_DEPTH2; + trace.entries = backtrace; + + /* + * Trace user stack if we are not a kernel thread + */ + if (tsk->mm) { + save_stack_trace_user_remote(tsk, &trace); + } + if (trace.nr_entries < trace.max_entries) + trace.entries[trace.nr_entries++] = ULONG_MAX; +} + +void diag_task_brief(struct task_struct *tsk, task_detail *detail) { + struct pid_namespace *ns; + struct pt_regs *task_regs; + struct task_struct *leader; + struct pt_regs *irq_regs; + + if (!detail) + return; + + memset(detail, 0, sizeof(task_detail)); + + if (!tsk || tsk->exit_state == EXIT_ZOMBIE) // zombie + return; + leader = tsk->group_leader; + if (!leader || leader->exit_state == EXIT_ZOMBIE) { + return; + } + + if (tsk != current) { // not current task + detail->user_mode = -1; + detail->syscallno = -1; + } else if (!tsk->mm) { // current task but kernel thread + detail->user_mode = 0; + detail->syscallno = -1; + } else { // current task and user thread + irq_regs = get_irq_regs(); // get current irq regs + task_regs = task_pt_regs(tsk); + + if ((irq_regs && user_mode(irq_regs)) || + (task_regs && user_mode(task_regs))) { + detail->user_mode = 1; // user mode + } else { + detail->user_mode = 0; // kernel mode + } + + if (task_regs) { + detail->syscallno = syscall_get_nr(tsk, task_regs); // get syscall no + } + } + + if (tsk->sched_class == orig_idle_sched_class) // idle task + detail->sys_task = 2; + else if (!tsk->mm) // kernel thread + detail->sys_task = 1; + else + detail->sys_task = 0; + + detail->pid = tsk->pid; // pid + detail->tgid = tsk->tgid; // tgid + detail->state = tsk->__state; // state + detail->task_type = diag_get_task_type(tsk); // task type + ns = task_active_pid_ns(tsk); // container pid + if (ns && ns != &init_pid_ns) { + detail->container_pid = task_pid_nr_ns(tsk, ns); + detail->container_tgid = task_tgid_nr_ns(tsk, ns); + } else { + detail->container_pid = tsk->pid; + detail->container_tgid = tsk->tgid; + } + strncpy(detail->comm, tsk->comm, TASK_COMM_LEN); + detail->comm[TASK_COMM_LEN - 1] = 0; // comm name + diag_cgroup_name(tsk, detail->cgroup_buf, CGROUP_NAME_LEN, 0); + diag_cgroup_name(tsk, detail->cgroup_cpuset, CGROUP_NAME_LEN, 1); + + detail->cgroup_buf[CGROUP_NAME_LEN - 1] = 0; // cgroup name + detail->cgroup_cpuset[CGROUP_NAME_LEN - 1] = 0; // cgroup cpuset name +} + +void diag_task_user_stack(struct task_struct *tsk, user_stack_detail *detail) { + struct pt_regs *regs; + unsigned long sp, ip, bp; + struct task_struct *leader; + + if (!detail) + return; + + detail->stack[0] = 0; + if (!tsk || !tsk->mm) + return; + + leader = tsk->group_leader; + if (!leader || !leader->mm || leader->exit_state == EXIT_ZOMBIE) { + return; + } + + sp = 0; + ip = 0; + bp = 0; + regs = task_pt_regs(tsk); + if (regs) { + sp = regs->sp; +#if defined(DIAG_ARM64) + ip = regs->pc; + bp = regs->sp; +#else + ip = regs->ip; + bp = regs->bp; +#endif + } +#if defined(DIAG_ARM64) + detail->regs = regs->user_regs; +#else + detail->regs = *regs; +#endif + detail->sp = sp; + detail->ip = ip; + detail->bp = bp; + + if (tsk == current) { + diagnose_save_stack_trace_user(detail->stack); + } else { + diagnose_save_stack_trace_user_remote(tsk, detail->stack); + } +} + +void diag_task_kern_stack(struct task_struct *tsk, kern_stack_detail *detail) { + orig_stack_trace_save_tsk(tsk, detail->stack, BACKTRACE_DEPTH2, 0); +} + +void dump_proc_chains_argv(int style, struct task_struct *tsk, mm_tree *mm_tree, + proc_chains_detail *detail) { + struct task_struct *walker; + mm_info *mm_info; + int cnt = 0; + int i = 0; + struct task_struct *leader; + + for (i = 0; i < PROCESS_CHAINS_COUNT; i++) { + detail->chains[i][0] = 0; + detail->tgid[i] = 0; + } + if (style == 0) + return; + + if (!tsk || !tsk->mm) + return; + + leader = tsk->group_leader; + if (!leader || !leader->mm || + leader->exit_state == EXIT_ZOMBIE) { // leader is zombie or no mm + return; + } + + rcu_read_lock(); + walker = tsk; + + while (walker->pid > 0) { + if (!thread_group_leader(walker)) + walker = rcu_dereference(walker->group_leader); + mm_info = find_mm_info(mm_tree, walker->mm); + if (mm_info) { + if (mm_info->cgroup_buf[0] == 0) + diag_cgroup_name(walker, mm_info->cgroup_buf, 255, 0); + strncpy(detail->chains[cnt], mm_info->argv, PROCESS_ARGV_LEN); + detail->full_argv[cnt] = 1; + } else { + strncpy(detail->chains[cnt], walker->comm, TASK_COMM_LEN); + detail->full_argv[cnt] = 0; + } + detail->tgid[cnt] = walker->pid; + walker = rcu_dereference(walker->real_parent); + cnt++; + if (cnt >= PROCESS_CHAINS_COUNT) + break; + } + rcu_read_unlock(); +}
\ No newline at end of file |
