summaryrefslogtreecommitdiff
path: root/SOURCE
diff options
context:
space:
mode:
authorXiongwei Jiang <[email protected]>2021-11-10 14:26:01 +0800
committerXiongwei Jiang <[email protected]>2021-11-10 14:26:01 +0800
commit4a950c984e2730fc35aaca319bcb28b51ce260b6 (patch)
tree9c41045eeae71454d97bb093df22b602d8594860 /SOURCE
parent19c8cbdf483bb14b4dae02cee41bc657bf98909c (diff)
sched: throttle-delay kernel space
Diffstat (limited to 'SOURCE')
-rw-r--r--SOURCE/diagnose-tools/throttle_delay.cc411
-rwxr-xr-xSOURCE/module/Makefile2
-rwxr-xr-xSOURCE/module/internal.h3
-rw-r--r--SOURCE/module/kernel/throttle_delay.c958
-rw-r--r--SOURCE/uapi/ali_diagnose.h12
-rw-r--r--SOURCE/uapi/throttle_delay.h62
6 files changed, 1445 insertions, 3 deletions
diff --git a/SOURCE/diagnose-tools/throttle_delay.cc b/SOURCE/diagnose-tools/throttle_delay.cc
new file mode 100644
index 0000000..5968565
--- /dev/null
+++ b/SOURCE/diagnose-tools/throttle_delay.cc
@@ -0,0 +1,411 @@
+/*
+ * Linux内核诊断工具--用户态throttle-delay功能实现
+ *
+ * Copyright (C) 2020 Alibaba Ltd.
+ *
+ * 作者: Baoyou Xie <[email protected]>
+ *
+ * License terms: GNU General Public License (GPL) version 3
+ *
+ */
+
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <sys/time.h>
+#include <string.h>
+#include <stdio.h> /* for printf */
+#include <stdlib.h> /* for exit */
+
+#include <set>
+
+#include "internal.h"
+#include "symbol.h"
+#include "json/json.h"
+#include <iostream>
+#include <fstream>
+
+#include "uapi/throttle_delay.h"
+#include "params_parse.h"
+#include <syslog.h>
+
+using namespace std;
+
+static char sls_file[256];
+static int syslog_enabled;
+
+void usage_throttle_delay(void)
+{
+ printf(" throttle-delay usage:\n");
+ printf(" --help throttle-delay help info\n");
+ printf(" --activate\n");
+ printf(" verbose VERBOSE\n");
+ printf(" threshold THRESHOLD(MS)\n");
+ printf(" tgid process group monitored\n");
+ printf(" pid thread id that monitored\n");
+ printf(" comm comm that monitored\n");
+ printf(" --deactivate\n");
+ printf(" --report dump log with text.\n");
+}
+
+static void do_activate(const char *arg)
+{
+ int ret = 0;
+ struct params_parser parse(arg);
+ struct diag_throttle_delay_settings settings;
+ string str;
+
+ memset(&settings, 0, sizeof(struct diag_throttle_delay_settings));
+
+ settings.verbose = parse.int_value("verbose");
+ settings.tgid = parse.int_value("tgid");
+ settings.pid = parse.int_value("pid");
+ settings.bvt = parse.int_value("bvt");
+ settings.threshold_ms = parse.int_value("threshold");
+
+ if (0 == settings.threshold_ms)
+ {
+ settings.threshold_ms = 50;
+ }
+
+ str = parse.string_value("comm");
+ if (str.length() > 0) {
+ strncpy(settings.comm, str.c_str(), TASK_COMM_LEN);
+ settings.comm[TASK_COMM_LEN - 1] = 0;
+ }
+
+ if (run_in_host) {
+ ret = diag_call_ioctl(DIAG_IOCTL_THROTTLE_DELAY_SET, (long)&settings);
+ } else {
+ ret = -ENOSYS;
+ syscall(DIAG_THROTTLE_DELAY_SET, &ret, &settings, sizeof(struct diag_throttle_delay_settings));
+ }
+
+ printf("功能设置%s,返回值:%d\n", ret ? "失败" : "成功", ret);
+ printf(" 进程ID:\t%d\n", settings.tgid);
+ printf(" 线程ID:\t%d\n", settings.pid);
+ printf(" 进程名称:\t%s\n", settings.comm);
+ printf(" 监控阈值(ms):\t%d\n", settings.threshold_ms);
+ printf(" 输出级别:\t%d\n", settings.verbose);
+ if (ret)
+ return;
+
+ ret = diag_activate("throttle-delay");
+ if (ret == 1) {
+ printf("throttle-delay activated\n");
+ } else {
+ printf("throttle-delay is not activated, ret %d\n", ret);
+ }
+}
+
+static void do_deactivate(void)
+{
+ int ret = 0;
+
+ ret = diag_deactivate("throttle-delay");
+ if (ret == 0) {
+ printf("throttle-delay is not activated\n");
+ } else {
+ printf("deactivate throttle-delay fail, ret is %d\n", ret);
+ }
+}
+
+static void do_settings(const char *arg)
+{
+ struct diag_throttle_delay_settings settings;
+ int ret;
+ int enable_json = 0;
+ Json::Value root;
+ struct params_parser parse(arg);
+ enable_json = parse.int_value("json");
+
+ if (run_in_host) {
+ ret = diag_call_ioctl(DIAG_IOCTL_THROTTLE_DELAY_SETTINGS, (long)&settings);
+ } else {
+ ret = -ENOSYS;
+ syscall(DIAG_THROTTLE_DELAY_SETTINGS, &ret, &settings,
+ sizeof(struct diag_throttle_delay_settings));
+ }
+
+ if (ret == 0) {
+ if (1 != enable_json)
+ {
+ printf("功能设置:\n");
+ printf(" 是否激活:\t%s\n", settings.activated ? "√" : "×");
+ printf(" 进程ID:\t%d\n", settings.tgid);
+ printf(" 线程ID:\t%d\n", settings.pid);
+ printf(" 进程名称:\t%s\n", settings.comm);
+ printf(" 监控阈值(ms):\t%d\n", settings.threshold_ms);
+ printf(" 输出级别:\t%d\n", settings.verbose);
+ }
+ else
+ {
+ root["activated"] = Json::Value(settings.activated);
+ root["tgid"] = Json::Value(settings.tgid);
+ root["pid"] = Json::Value(settings.pid);
+ root["comm"] = Json::Value(settings.comm);
+ root["threshold"] = Json::Value(settings.threshold_ms);
+ root["verbose"] = Json::Value(settings.verbose);
+ }
+ } else {
+ if (1 != enable_json)
+ {
+ printf("获取throttle-delay设置失败,请确保正确安装了diagnose-tools工具\n");
+ }
+ else
+ {
+ root["err"]=Json::Value("found throttle-delay settings failed, please check diagnose-tools installed or not\n");
+ }
+ }
+
+ if (1 == enable_json)
+ {
+ std::string str_log;
+ str_log.append(root.toStyledString());
+ printf("%s", str_log.c_str());
+ }
+}
+
+static int throttle_delay_extract(void *buf, unsigned int len, void *)
+{
+ int *et_type;
+ struct throttle_delay_dither *dither;
+ struct throttle_delay_rq *rq;
+ static int seq = 0;
+
+ if (len == 0)
+ return 0;
+
+ et_type = (int *)buf;
+ switch (*et_type) {
+ case et_throttle_delay_dither:
+ if (len < sizeof(struct throttle_delay_dither))
+ break;
+ dither = (struct throttle_delay_dither *)buf;
+
+ printf("警告:调度被延迟 %lu ms,NOW: %lu, QUEUED: %lu, 当前时间:[%lu:%lu]\n",
+ dither->delay_ms,
+ dither->now,
+ dither->dequeued,
+ dither->tv.tv_sec,
+ dither->tv.tv_usec);
+
+ printf("##CGROUP:[%s] %d [%03d] 采样命中\n",
+ dither->task.cgroup_buf,
+ dither->task.pid,
+ seq);
+ seq++;
+
+ diag_printf_kern_stack(&dither->kern_stack);
+ diag_printf_user_stack(dither->task.tgid,
+ dither->task.container_tgid,
+ dither->task.comm,
+ &dither->user_stack);
+ printf("#* 0xffffffffffffff %s (UNKNOWN)\n",
+ dither->task.comm);
+ diag_printf_proc_chains(&dither->proc_chains);
+ printf("##\n");
+
+ break;
+ case et_throttle_delay_rq:
+ if (len < sizeof(struct throttle_delay_rq))
+ break;
+ rq = (struct throttle_delay_rq *)buf;
+
+ printf("\tCPU %d,nr_running:%d\n",
+ rq->cpu, rq->nr_running);
+
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void do_extract(char *buf, int len)
+{
+ extract_variant_buffer(buf, len, throttle_delay_extract, NULL);
+}
+
+static void do_dump(const char *arg)
+{
+ static char variant_buf[140 * 1024 * 1024];
+ int len;
+ int ret = 0;
+ struct diag_ioctl_dump_param dump_param = {
+ .user_ptr_len = &len,
+ .user_buf_len = 4 * 1024 * 1024,
+ .user_buf = variant_buf,
+ };
+
+ memset(variant_buf, 0, 4 * 1024 * 1024);
+ if (run_in_host) {
+ ret = diag_call_ioctl(DIAG_IOCTL_THROTTLE_DELAY_DUMP, (long)&dump_param);
+ } else {
+ ret = -ENOSYS;
+ syscall(DIAG_THROTTLE_DELAY_DUMP, &ret, &len, variant_buf, 4 * 1024 * 1024);
+ }
+
+ if (ret == 0 && len > 0) {
+ do_extract(variant_buf, len);
+ }
+}
+
+static int sls_extract(void *buf, unsigned int len, void *)
+{
+ int *et_type;
+ struct throttle_delay_dither *dither;
+ struct throttle_delay_rq *rq;
+ symbol sym;
+
+ Json::Value root;
+ Json::Value task;
+ Json::Value kern_stack;
+ Json::Value user_stack;
+ Json::Value proc_chains;
+
+ if (len == 0)
+ return 0;
+
+ et_type = (int *)buf;
+ switch (*et_type) {
+ case et_throttle_delay_dither:
+ if (len < sizeof(struct throttle_delay_dither))
+ break;
+ dither = (struct throttle_delay_dither *)buf;
+ root["id"] = dither->id;
+ root["seq"] = dither->seq;
+ root["delay_ms"] = Json::Value(dither->delay_ms);
+ root["now"] = Json::Value(dither->now);
+ root["queued"] = Json::Value(dither->dequeued);
+ diag_sls_time(&dither->tv, root);
+ diag_sls_task(&dither->task, task);
+ diag_sls_kern_stack(&dither->kern_stack, task);
+ diag_sls_user_stack(dither->task.tgid,
+ dither->task.container_tgid,
+ dither->task.comm,
+ &dither->user_stack, task, 0);
+ diag_sls_proc_chains(&dither->proc_chains, task);
+ root["task"] = task;
+
+ write_file(sls_file, "throttle-delay-dither", &dither->tv, dither->id, dither->seq, root);
+ write_syslog(syslog_enabled, "throttle-delay-dither", &dither->tv, dither->id, dither->seq, root);
+ break;
+ case et_throttle_delay_rq:
+ if (len < sizeof(struct throttle_delay_rq))
+ break;
+ rq = (struct throttle_delay_rq *)buf;
+ root["id"] = rq->id;
+ root["seq"] = rq->seq;
+ diag_sls_time(&rq->tv, root);
+ root["cpu"] = rq->cpu;
+ root["nr_running"] = rq->nr_running;
+ write_file(sls_file, "throttle-delay-rq", &rq->tv, rq->id, rq->seq, root);
+ write_syslog(syslog_enabled, "throttle-delay-rq", &rq->tv, rq->id, rq->seq, root);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void do_sls(char *arg)
+{
+ int ret;
+ static char variant_buf[4 * 1024 * 1024];
+ int len;
+ int jiffies_sls = 0;
+ struct diag_ioctl_dump_param dump_param = {
+ .user_ptr_len = &len,
+ .user_buf_len = 4 * 1024 * 1024,
+ .user_buf = variant_buf,
+ };
+
+ ret = log_config(arg, sls_file, &syslog_enabled);
+ if (ret != 1)
+ return;
+
+ java_attach_once();
+ while (1) {
+ if (run_in_host) {
+ ret = diag_call_ioctl(DIAG_IOCTL_THROTTLE_DELAY_DUMP, (long)&dump_param);
+ } else {
+ ret = -ENOSYS;
+ syscall(DIAG_THROTTLE_DELAY_DUMP, &ret, &len, variant_buf, 4 * 1024 * 1024);
+ }
+
+ if (ret == 0 && len > 0) {
+ /**
+ * 10 min
+ */
+ if (jiffies_sls >= 60) {
+ jiffies_sls = 0;
+ clear_symbol_info(pid_cmdline, g_symbol_parser.get_java_procs(), 1);
+ java_attach_once();
+ }
+
+ extract_variant_buffer(variant_buf, len, sls_extract, NULL);
+ }
+
+ sleep(10);
+ jiffies_sls++;
+ }
+}
+
+int throttle_delay_main(int argc, char **argv)
+{
+ static struct option long_options[] = {
+ {"help", no_argument, 0, 0 },
+ {"activate", optional_argument, 0, 0 },
+ {"deactivate", no_argument, 0, 0 },
+ {"settings", optional_argument, 0, 0 },
+ {"report", optional_argument, 0, 0 },
+ {"log", required_argument, 0, 0 },
+ {0, 0, 0, 0 }
+ };
+ int c;
+
+ if (argc <= 1) {
+ usage_throttle_delay();
+ return 0;
+ }
+ while (1) {
+ int option_index = -1;
+
+ c = getopt_long_only(argc, argv, "", long_options, &option_index);
+ if (c == -1)
+ break;
+ switch (option_index) {
+ case 0:
+ usage_throttle_delay();
+ break;
+ case 1:
+ do_activate(optarg ? optarg : "");
+ break;
+ case 2:
+ do_deactivate();
+ break;
+ case 3:
+ do_settings(optarg ? optarg : "");
+ break;
+ case 4:
+ do_dump(optarg ? optarg : "");
+ break;
+ case 5:
+ do_sls(optarg);
+ break;
+ default:
+ usage_throttle_delay();
+ break;
+ }
+ }
+
+ return 0;
+}
diff --git a/SOURCE/module/Makefile b/SOURCE/module/Makefile
index 298c20f..0838616 100755
--- a/SOURCE/module/Makefile
+++ b/SOURCE/module/Makefile
@@ -214,7 +214,7 @@ ifneq ($(KERNELRELEASE),)
kernel/exec.o kernel/perf.o kernel/run_trace.o kernel/irq_trace.o \
kernel/kprobe.o kernel/utilization.o kernel/sched_delay.o kernel/reboot.o \
kernel/uprobe.o kernel/sys_cost.o kernel/sig_info.o kernel/task_monitor.o \
- kernel/rw_sem.o
+ kernel/rw_sem.o kernel/throttle_delay.o
$(TARGET)-objs += mm/mm_entry.o mm/alloc_page.o mm/alloc_top.o mm/high_order.o mm/rss_monitor.o mm/memcg_stats.o
$(TARGET)-objs += io/io_entry.o
diff --git a/SOURCE/module/internal.h b/SOURCE/module/internal.h
index adb0372..1de3842 100755
--- a/SOURCE/module/internal.h
+++ b/SOURCE/module/internal.h
@@ -57,7 +57,7 @@ static inline void __percpu_counter_add(struct percpu_counter *fbc,
#include "uapi/rss_monitor.h"
#include "pub/variant_buffer.h"
#include "pub/stack.h"
-
+#include "uapi/throttle_delay.h"
/**
* 手工替换函数相关的宏
*/
@@ -429,6 +429,7 @@ struct diag_percpu_context {
struct event_run_trace_raw event_run_trace_raw;
struct sys_delay_detail sys_delay_detail;
struct sched_delay_dither sched_delay_dither;
+ struct throttle_delay_dither throttle_delay_dither;
struct {
struct uprobe_detail uprobe_detail;
diff --git a/SOURCE/module/kernel/throttle_delay.c b/SOURCE/module/kernel/throttle_delay.c
new file mode 100644
index 0000000..578710a
--- /dev/null
+++ b/SOURCE/module/kernel/throttle_delay.c
@@ -0,0 +1,958 @@
+/*
+ * Linux内核诊断工具--内核态throttle-delay功能
+ *
+ * Copyright (C) 2020 Alibaba Ltd.
+ *
+ * 作者: Xiongwei Jiang <[email protected]>
+ *
+ * License terms: GNU General Public License (GPL) version 3
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/stacktrace.h>
+#include <linux/hrtimer.h>
+#include <linux/kernel.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/timex.h>
+#include <linux/tracepoint.h>
+#include <trace/events/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include <trace/events/napi.h>
+#include <linux/rtc.h>
+#include <linux/time.h>
+#include <linux/version.h>
+#include <linux/net.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/icmp.h>
+#include <linux/netfilter.h>
+#include <net/tcp.h>
+#include <linux/stop_machine.h>
+#include <linux/smp.h>
+#include <asm/thread_info.h>
+
+#include "internal.h"
+#include "mm_tree.h"
+#include "kern_internal.h"
+#include "pub/trace_file.h"
+#include "pub/trace_point.h"
+
+#include "uapi/throttle_delay.h"
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 32) && \
+ LINUX_VERSION_CODE <= KERNEL_VERSION(4, 20, 0) \
+ && !defined(UBUNTU_1604)
+
+#if defined(ALIOS_4000_009)
+static unsigned long *get_last_dequeued_addr(struct task_struct *p)
+{
+ /**
+ * task_stack_page, but not end_of_stack !!
+ */
+ return task_stack_page(p) + sizeof(struct thread_info) + 32;
+}
+#else
+#if defined(CENTOS_8U)
+#define diag_last_dequeued rh_reserved2
+#elif KERNEL_VERSION(4, 9, 0) <= LINUX_VERSION_CODE
+#define diag_last_dequeued ali_reserved3
+#elif KERNEL_VERSION(3, 10, 0) <= LINUX_VERSION_CODE
+#define diag_last_dequeued rh_reserved3
+#else
+#define diag_last_dequeued rh_reserved[0]
+#endif
+
+static unsigned long *get_last_dequeued_addr(struct task_struct *p)
+{
+ return &p->diag_last_dequeued;
+}
+
+#endif
+
+#define entity_is_task(se) (!se->my_q)
+
+//static struct kprobe kprobe_dequeue_entity;
+//static int (*orig_throttle_cfs_rq)(struct cfs_rq *cfs_rq);
+
+
+/* task group related information */
+struct rt_bandwidth {
+ /* nests inside the rq lock: */
+ raw_spinlock_t rt_runtime_lock;
+ ktime_t rt_period;
+ u64 rt_runtime;
+ struct hrtimer rt_period_timer;
+ unsigned int rt_period_active;
+};
+struct cfs_bandwidth {
+#ifdef CONFIG_CFS_BANDWIDTH
+ raw_spinlock_t lock;
+ ktime_t period;
+ u64 quota, runtime;
+ s64 hierarchical_quota;
+ u64 runtime_expires;
+ int expires_seq;
+
+ u8 idle;
+ u8 period_active;
+ u8 slack_started;
+ struct hrtimer period_timer, slack_timer;
+ struct list_head throttled_cfs_rq;
+
+ /* statistics */
+ int nr_periods, nr_throttled;
+ u64 throttled_time;
+#endif
+};
+
+
+
+struct task_group {
+ struct cgroup_subsys_state css;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ /* schedulable entities of this group on each cpu */
+ struct sched_entity **se;
+ /* runqueue "owned" by this group on each cpu */
+ struct cfs_rq **cfs_rq;
+ unsigned long shares;
+ int bvt;
+#ifdef CONFIG_SMP
+ /*
+ * load_avg can be heavily contended at clock tick time, so put
+ * it in its own cacheline separated from the fields above which
+ * will also be accessed at each tick.
+ */
+ atomic_long_t load_avg ____cacheline_aligned;
+#endif
+#endif
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ struct sched_rt_entity **rt_se;
+ struct rt_rq **rt_rq;
+
+ struct rt_bandwidth rt_bandwidth;
+#endif
+
+ struct rcu_head rcu;
+ struct list_head list;
+
+ struct task_group *parent;
+ struct list_head siblings;
+ struct list_head children;
+
+#ifdef CONFIG_SCHED_AUTOGROUP
+ struct autogroup *autogroup;
+#endif
+
+ struct cfs_bandwidth cfs_bandwidth;
+
+ ALI_HOTFIX_RESERVE(1)
+ ALI_HOTFIX_RESERVE(2)
+ ALI_HOTFIX_RESERVE(3)
+ ALI_HOTFIX_RESERVE(4)
+};
+
+/* CFS-related fields in a runqueue */
+struct cfs_rq {
+ struct load_weight load;
+ unsigned int nr_running, h_nr_running;
+
+ u64 exec_clock;
+ u64 min_vruntime;
+#ifndef CONFIG_64BIT
+ u64 min_vruntime_copy;
+#endif
+
+ struct rb_root tasks_timeline;
+ struct rb_node *rb_leftmost;
+
+ /*
+ * 'curr' points to currently running entity on this cfs_rq.
+ * It is set to NULL otherwise (i.e when none are currently running).
+ */
+ struct sched_entity *curr, *next, *last, *skip;
+
+ /* Effective bvt type */
+ int ebvt;
+
+#ifdef CONFIG_SCHED_DEBUG
+ unsigned int nr_spread_over;
+#endif
+
+#ifdef CONFIG_SMP
+ /*
+ * CFS load tracking
+ */
+ struct sched_avg avg;
+ u64 runnable_load_sum;
+ unsigned long runnable_load_avg;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ unsigned long tg_load_avg_contrib;
+#endif
+ atomic_long_t removed_load_avg, removed_util_avg;
+#ifndef CONFIG_64BIT
+ u64 load_last_update_time_copy;
+#endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ /*
+ * h_load = weight * f(tg)
+ *
+ * Where f(tg) is the recursive weight fraction assigned to
+ * this group.
+ */
+ unsigned long h_load;
+ u64 last_h_load_update;
+ struct sched_entity *h_load_next;
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
+
+ /*
+ * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
+ * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
+ * (like users, containers etc.)
+ *
+ * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
+ * list is used during load balance.
+ */
+ int on_list;
+ struct list_head leaf_cfs_rq_list;
+ struct task_group *tg; /* group that "owns" this runqueue */
+ struct list_head batch_node;
+ unsigned int nr_batch_running; /* only tasks, no group se */
+
+#ifdef CONFIG_CFS_BANDWIDTH
+ int runtime_enabled;
+ int expires_seq;
+ u64 runtime_expires;
+ s64 runtime_remaining;
+
+ u64 throttled_clock, throttled_clock_task;
+ u64 throttled_clock_task_time;
+ int throttled, throttle_count;
+ struct list_head throttled_list;
+#endif /* CONFIG_CFS_BANDWIDTH */
+
+#ifdef CONFIG_CFS_BVT
+ u64 kick_delay_nc;
+ u64 throttled_clock_nc;
+ u64 throttled_time_nc; /* total time */
+ u64 throttled_time_nc_max; /* single max time */
+ int throttled_nc;
+ struct list_head throttled_node_nc;
+#endif
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
+ unsigned long nr_uninterruptible;
+
+ ALI_HOTFIX_RESERVE(1)
+ ALI_HOTFIX_RESERVE(2)
+ ALI_HOTFIX_RESERVE(3)
+ ALI_HOTFIX_RESERVE(4)
+};
+
+/*
+ * This is the priority-queue data structure of the RT scheduling class:
+ */
+struct rt_prio_array {
+ DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
+ struct list_head queue[MAX_RT_PRIO];
+};
+
+/* Real-Time classes' related field in a runqueue: */
+struct rt_rq {
+ struct rt_prio_array active;
+ unsigned int rt_nr_running;
+ unsigned int rr_nr_running;
+#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
+ struct {
+ int curr; /* highest queued rt task prio */
+#ifdef CONFIG_SMP
+ int next; /* next highest */
+#endif
+ } highest_prio;
+#endif
+#ifdef CONFIG_SMP
+ unsigned long rt_nr_migratory;
+ unsigned long rt_nr_total;
+ int overloaded;
+ struct plist_head pushable_tasks;
+#endif /* CONFIG_SMP */
+ int rt_queued;
+
+ int rt_throttled;
+ u64 rt_time;
+ u64 rt_runtime;
+ /* Nests inside the rq lock: */
+ raw_spinlock_t rt_runtime_lock;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ unsigned long rt_nr_boosted;
+
+ struct rq *rq;
+ struct task_group *tg;
+#endif
+
+ unsigned long nr_uninterruptible;
+};
+
+/* Deadline class' related fields in a runqueue */
+struct dl_rq {
+ /* runqueue is an rbtree, ordered by deadline */
+ struct rb_root rb_root;
+ struct rb_node *rb_leftmost;
+
+ unsigned long dl_nr_running;
+
+#ifdef CONFIG_SMP
+ /*
+ * Deadline values of the currently executing and the
+ * earliest ready task on this rq. Caching these facilitates
+ * the decision wether or not a ready but not running task
+ * should migrate somewhere else.
+ */
+ struct {
+ u64 curr;
+ u64 next;
+ } earliest_dl;
+
+ unsigned long dl_nr_migratory;
+ int overloaded;
+
+ /*
+ * Tasks on this rq that can be pushed away. They are kept in
+ * an rb-tree, ordered by tasks' deadlines, with caching
+ * of the leftmost (earliest deadline) element.
+ */
+ struct rb_root pushable_dl_tasks_root;
+ struct rb_node *pushable_dl_tasks_leftmost;
+#else
+ struct dl_bw dl_bw;
+#endif
+};
+
+#if 0
+typedef void (*smp_call_func_t)(void *info);
+struct call_single_data {
+ struct llist_node llist;
+ smp_call_func_t func;
+ void *info;
+ unsigned int flags;
+};
+#endif
+
+/*
+ * This is the main, per-CPU runqueue data structure.
+ *
+ * Locking rule: those places that want to lock multiple runqueues
+ * (such as the load balancing or the thread migration code), lock
+ * acquire operations must be ordered by ascending &runqueue.
+ */
+struct rq {
+ /* runqueue lock: */
+ raw_spinlock_t lock;
+
+ /*
+ * nr_running and cpu_load should be in the same cacheline because
+ * remote CPUs use both these fields when doing load calculation.
+ */
+ unsigned int nr_running;
+#ifdef CONFIG_NUMA_BALANCING
+ unsigned int nr_numa_running;
+ unsigned int nr_preferred_running;
+#endif
+ #define CPU_LOAD_IDX_MAX 5
+ unsigned long cpu_load[CPU_LOAD_IDX_MAX];
+#ifdef CONFIG_NO_HZ_COMMON
+#ifdef CONFIG_SMP
+ unsigned long last_load_update_tick;
+#endif /* CONFIG_SMP */
+ unsigned long nohz_flags;
+#endif /* CONFIG_NO_HZ_COMMON */
+#ifdef CONFIG_NO_HZ_FULL
+ unsigned long last_sched_tick;
+#endif
+ /* capture load from *all* tasks on this cpu: */
+ struct load_weight load;
+ unsigned long nr_load_updates;
+ u64 nr_switches;
+
+ struct cfs_rq cfs;
+ struct rt_rq rt;
+ struct dl_rq dl;
+
+ u64 kick_start_nc;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ /* list of leaf cfs_rq on this cpu: */
+ struct list_head leaf_cfs_rq_list;
+#ifdef CONFIG_CFS_BVT
+ struct list_head throttled_list_nc;
+#endif
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
+ /*
+ * This is part of a global counter where only the total sum
+ * over all CPUs matters. A task can increase this counter on
+ * one CPU and if it got migrated afterwards it may decrease
+ * it on another CPU. Always updated under the runqueue lock:
+ */
+ unsigned long nr_uninterruptible;
+
+ struct task_struct *curr, *idle, *stop;
+ unsigned long next_balance;
+ struct mm_struct *prev_mm;
+
+ unsigned int clock_skip_update;
+ u64 clock;
+ u64 clock_task;
+
+ atomic_t nr_iowait;
+
+#ifdef CONFIG_SMP
+ struct root_domain *rd;
+ struct sched_domain *sd;
+
+ unsigned long cpu_capacity;
+ unsigned long cpu_capacity_orig;
+
+ struct callback_head *balance_callback;
+
+ unsigned char idle_balance;
+ /* For active balancing */
+ int active_balance;
+ int push_cpu;
+ struct cpu_stop_work active_balance_work;
+ /* cpu of this runqueue: */
+ int cpu;
+ int online;
+
+ struct list_head cfs_tasks;
+#ifdef CONFIG_CFS_BVT
+ unsigned int nr_active_batch;
+ unsigned int nr_ls_tasks;
+ atomic_t curr_task_type;
+ int cpu_sibling;
+ unsigned int nr_deactive_batchq;
+ struct list_head batchqs;
+ u64 throttled_clock_nc;
+ s64 exempt_quota_nc;
+#endif
+
+ u64 rt_avg;
+ u64 age_stamp;
+ u64 idle_stamp;
+ u64 avg_idle;
+
+ /* This is used to determine avg_idle's max value */
+ u64 max_idle_balance_cost;
+#endif
+
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+ u64 prev_irq_time;
+#endif
+#ifdef CONFIG_PARAVIRT
+ u64 prev_steal_time;
+#endif
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+ u64 prev_steal_time_rq;
+#endif
+
+ /* calc_load related fields */
+ unsigned long calc_load_update;
+ long calc_load_active;
+ long calc_load_active_r;
+
+#ifdef CONFIG_SCHED_HRTICK
+#ifdef CONFIG_SMP
+ int hrtick_csd_pending;
+ struct call_single_data hrtick_csd;
+#endif
+ struct hrtimer hrtick_timer;
+#endif
+
+#ifdef CONFIG_SCHEDSTATS
+ /* latency stats */
+ struct sched_info rq_sched_info;
+ unsigned long long rq_cpu_time;
+ /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
+
+ /* sys_sched_yield() stats */
+ unsigned int yld_count;
+
+ /* schedule() stats */
+ unsigned int sched_count;
+ unsigned int sched_goidle;
+
+ /* try_to_wake_up() stats */
+ unsigned int ttwu_count;
+ unsigned int ttwu_local;
+#endif
+
+#ifdef CONFIG_SMP
+ struct llist_head wake_list;
+#endif
+
+#ifdef CONFIG_CPU_IDLE
+ /* Must be inspected within a rcu lock section */
+ struct cpuidle_state *idle_state;
+#endif
+
+ ALI_HOTFIX_RESERVE(1)
+ ALI_HOTFIX_RESERVE(2)
+ ALI_HOTFIX_RESERVE(3)
+ ALI_HOTFIX_RESERVE(4)
+ ALI_HOTFIX_RESERVE(5)
+ ALI_HOTFIX_RESERVE(6)
+ ALI_HOTFIX_RESERVE(7)
+ ALI_HOTFIX_RESERVE(8)
+};
+
+typedef int (*tg_visitor)(struct task_group *, void *);
+
+__maybe_unused static atomic64_t diag_nr_running = ATOMIC64_INIT(0);
+struct diag_throttle_delay_settings throttle_delay_settings = {
+ .threshold_ms = 50,
+};
+
+static int throttle_delay_alloced;
+static int diag_throttle_delay_id;
+static int throttle_delay_seq;
+static struct diag_variant_buffer throttle_delay_variant_buffer;
+
+DEFINE_ORIG_FUNC(void, throttle_cfs_rq, 1,
+ struct cfs_rq *, cfs_rq);
+
+static inline int cpu_of(struct rq *rq)
+{
+#ifdef CONFIG_SMP
+ return rq->cpu;
+#else
+ return 0;
+#endif
+}
+
+static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
+{
+ return cfs_rq->rq;
+}
+
+int walk_tg_tree_from(struct task_group *from,
+ tg_visitor down, tg_visitor up, void *data)
+{
+ struct task_group *parent, *child;
+ int ret;
+
+ parent = from;
+
+down:
+ ret = (*down)(parent, data);
+ if (ret)
+ goto out;
+ list_for_each_entry_rcu(child, &parent->children, siblings) {
+ parent = child;
+ goto down;
+
+up:
+ continue;
+ }
+ ret = (*up)(parent, data);
+ if (ret || parent == from)
+ goto out;
+
+ child = parent;
+ parent = parent->parent;
+ if (parent)
+ goto up;
+out:
+ return ret;
+}
+
+static inline struct task_struct *task_of(struct sched_entity *se)
+{
+ return container_of(se, struct task_struct, se);
+}
+
+int tg_nop(struct task_group *tg, void *data)
+{
+ return 0;
+}
+
+static unsigned long read_last_dequeued(struct task_struct *p)
+{
+ unsigned long *ptr = get_last_dequeued_addr(p);
+
+ if (ptr) {
+ return *ptr;
+ } else {
+ return 0;
+ }
+}
+
+
+static void update_last_dequeued(struct task_struct *p, unsigned long stamp)
+{
+ unsigned long *ptr = get_last_dequeued_addr(p);
+
+ if (ptr) {
+ *ptr = stamp;
+ }
+}
+
+
+static int tg_throttle_down(struct task_group *tg, void *data)
+{
+ struct rq *rq = data;
+ struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
+ struct rb_node *node;
+ struct sched_entity *se;
+
+ if (!throttle_delay_settings.activated)
+ return 0;
+
+ for (node = rb_first(&cfs_rq->tasks_timeline); node; node = rb_next(node)) {
+ se = rb_entry(node, struct sched_entity, run_node);
+ if (entity_is_task(se)) {
+ struct task_struct *p = task_of(se);
+ update_last_dequeued(p, ktime_to_ms(ktime_get()));
+ }
+
+ }
+ return 0;
+}
+
+static void diag_throttle_cfs_rq(struct cfs_rq *cfs_rq)
+{
+ struct rq *rq = rq_of(cfs_rq);
+
+ rcu_read_lock();
+ walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
+ rcu_read_unlock();
+
+ orig_throttle_cfs_rq(cfs_rq);
+
+}
+
+
+static void new_throttle_cfs_rq(struct cfs_rq *cfs_rq)
+{
+ atomic64_inc_return(&diag_nr_running);
+ diag_throttle_cfs_rq(cfs_rq);
+ atomic64_dec_return(&diag_nr_running);
+}
+
+static int lookup_syms(void)
+{
+ LOOKUP_SYMS(throttle_cfs_rq);
+ return 0;
+}
+
+static void jump_init(void)
+{
+ JUMP_INIT(throttle_cfs_rq);
+
+}
+
+static int kprobe_dequeue_entity_pre(struct kprobe *p, struct pt_regs *regs)
+{
+ struct sched_entity *se = (void *)ORIG_PARAM2(regs);
+ int *flags = (void *)ORIG_PARAM3(regs);
+ struct task_struct *task;
+
+ if (!throttle_delay_settings.activated)
+ return 0;
+
+
+ return 0;
+}
+
+#if KERNEL_VERSION(4, 9, 0) <= LINUX_VERSION_CODE
+static void trace_sched_switch_hit(void *__data, bool preempt,
+ struct task_struct *prev, struct task_struct *next)
+#elif KERNEL_VERSION(3, 10, 0) <= LINUX_VERSION_CODE
+static void trace_sched_switch_hit(void *__data,
+ struct task_struct *prev, struct task_struct *next)
+#else
+static void trace_sched_switch_hit(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next)
+#endif
+{
+ unsigned long long t_dequeued;
+ unsigned long long delta = 0;
+ unsigned long long delta_ms;
+ unsigned long long now = ktime_to_ms(ktime_get());
+
+ struct task_struct *leader = next->group_leader ? next->group_leader : next;
+
+ if (throttle_delay_settings.bvt == 0 && diag_get_task_type(next) < 0)
+ return;
+
+ if (throttle_delay_settings.comm[0] && (strcmp("none", throttle_delay_settings.comm) != 0)) {
+ if (strcmp(leader->comm, throttle_delay_settings.comm) != 0)
+ return;
+ }
+
+ if (throttle_delay_settings.tgid && leader->pid != throttle_delay_settings.tgid) {
+ return;
+ }
+
+ if (throttle_delay_settings.pid && next->pid != throttle_delay_settings.pid) {
+ return;
+ }
+
+ t_dequeued = read_last_dequeued(next);
+ update_last_dequeued(next, 0);
+ if (t_dequeued <= 0)
+ return;
+
+ delta = now - t_dequeued;
+ delta_ms = delta;
+
+ if (delta_ms >= throttle_delay_settings.threshold_ms) {
+ struct throttle_delay_dither *dither;
+ unsigned long flags;
+
+ if (strcmp(leader->comm, "qemu-kvm") == 0)
+ return;
+
+ dither = &diag_percpu_context[smp_processor_id()]->throttle_delay_dither;
+ dither->et_type = et_throttle_delay_dither;
+ dither->id = diag_throttle_delay_id;
+ do_diag_gettimeofday(&dither->tv);
+ dither->seq = throttle_delay_seq;
+ throttle_delay_seq++;
+ dither->now = now;
+ dither->dequeued = t_dequeued;
+ dither->delay_ms = delta_ms;
+ diag_task_brief(next, &dither->task);
+ diag_task_kern_stack(next, &dither->kern_stack);
+ diag_task_user_stack(next, &dither->user_stack);
+ dump_proc_chains_simple(next, &dither->proc_chains);
+
+ diag_variant_buffer_spin_lock(&throttle_delay_variant_buffer, flags);
+ diag_variant_buffer_reserve(&throttle_delay_variant_buffer, sizeof(struct throttle_delay_dither));
+ diag_variant_buffer_write_nolock(&throttle_delay_variant_buffer, dither, sizeof(struct throttle_delay_dither));
+ diag_variant_buffer_seal(&throttle_delay_variant_buffer);
+ diag_variant_buffer_spin_unlock(&throttle_delay_variant_buffer, flags);
+ }
+}
+
+static int __activate_throttle_delay(void)
+{
+ int ret = 0;
+
+ ret = alloc_diag_variant_buffer(&throttle_delay_variant_buffer);
+ if (ret)
+ goto out_variant_buffer;
+ throttle_delay_alloced = 1;
+
+ JUMP_CHECK(throttle_cfs_rq);
+
+ hook_tracepoint("sched_switch", trace_sched_switch_hit, NULL);
+// hook_kprobe(&kprobe_dequeue_entity, "dequeue_entity",
+// kprobe_dequeue_entity_pre, NULL);
+ JUMP_INSTALL(throttle_cfs_rq);
+ return 1;
+out_variant_buffer:
+ return 0;
+}
+
+int activate_throttle_delay(void)
+{
+ if (!throttle_delay_settings.activated)
+ throttle_delay_settings.activated = __activate_throttle_delay();
+
+ return throttle_delay_settings.activated;
+}
+
+static void __deactivate_throttle_delay(void)
+{
+ unhook_tracepoint("sched_switch", trace_sched_switch_hit, NULL);
+
+ JUMP_REMOVE(throttle_cfs_rq);
+
+ msleep(20);
+ while (atomic64_read(&diag_nr_running) > 0)
+ {
+ msleep(10);
+ }
+}
+
+int deactivate_throttle_delay(void)
+{
+ if (throttle_delay_settings.activated)
+ __deactivate_throttle_delay();
+ throttle_delay_settings.activated = 0;
+
+ return 0;
+}
+
+static void dump_data(void)
+{
+ struct throttle_delay_rq rq;
+ unsigned long flags;
+ int cpu;
+
+ rq.et_type = et_throttle_delay_rq;
+ rq.id = diag_throttle_delay_id;
+ do_diag_gettimeofday(&rq.tv);
+
+ for_each_online_cpu(cpu)
+ {
+ rq.seq = throttle_delay_seq;
+ throttle_delay_seq++;
+ rq.cpu = cpu;
+
+ diag_variant_buffer_spin_lock(&throttle_delay_variant_buffer, flags);
+ diag_variant_buffer_reserve(&throttle_delay_variant_buffer, sizeof(struct throttle_delay_rq));
+ diag_variant_buffer_write_nolock(&throttle_delay_variant_buffer, &rq, sizeof(struct throttle_delay_rq));
+ diag_variant_buffer_seal(&throttle_delay_variant_buffer);
+ diag_variant_buffer_spin_unlock(&throttle_delay_variant_buffer, flags);
+ }
+
+
+}
+
+int throttle_delay_syscall(struct pt_regs *regs, long id)
+{
+ int __user *user_ptr_len;
+ size_t __user user_buf_len;
+ void __user *user_buf;
+ int ret = 0;
+ static struct diag_throttle_delay_settings settings;
+
+ switch (id) {
+ case DIAG_THROTTLE_DELAY_SET:
+ user_buf = (void __user *)SYSCALL_PARAM1(regs);
+ user_buf_len = (size_t)SYSCALL_PARAM2(regs);
+
+ if (user_buf_len != sizeof(struct diag_throttle_delay_settings)) {
+ ret = -EINVAL;
+ } else if (throttle_delay_settings.activated) {
+ ret = -EBUSY;
+ } else {
+ ret = copy_from_user(&settings, user_buf, user_buf_len);
+ if (!ret) {
+ throttle_delay_settings = settings;
+ }
+ }
+ break;
+ case DIAG_THROTTLE_DELAY_SETTINGS:
+ user_buf = (void __user *)SYSCALL_PARAM1(regs);
+ user_buf_len = (size_t)SYSCALL_PARAM2(regs);
+
+ if (user_buf_len != sizeof(struct diag_throttle_delay_settings)) {
+ ret = -EINVAL;
+ } else {
+ settings = throttle_delay_settings;
+ ret = copy_to_user(user_buf, &settings, user_buf_len);
+ }
+ break;
+ case DIAG_THROTTLE_DELAY_DUMP:
+ user_ptr_len = (void __user *)SYSCALL_PARAM1(regs);
+ user_buf = (void __user *)SYSCALL_PARAM2(regs);
+ user_buf_len = (size_t)SYSCALL_PARAM3(regs);
+
+ if (!throttle_delay_alloced) {
+ ret = -EINVAL;
+ } else {
+ dump_data();
+ ret = copy_to_user_variant_buffer(&throttle_delay_variant_buffer,
+ user_ptr_len, user_buf, user_buf_len);
+ diag_throttle_delay_id++;
+ record_dump_cmd("throttle-delay");
+ }
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+
+ return ret;
+}
+
+long diag_ioctl_throttle_delay(unsigned int cmd, unsigned long arg)
+{
+ struct diag_ioctl_dump_param dump_param;
+ int ret = 0;
+ static struct diag_throttle_delay_settings settings;
+
+ switch (cmd) {
+ case CMD_THROTTLE_DELAY_SET:
+ if (throttle_delay_settings.activated) {
+ ret = -EBUSY;
+ } else {
+ ret = copy_from_user(&settings, (void *)arg, sizeof(struct diag_throttle_delay_settings));
+ if (!ret) {
+ throttle_delay_settings = settings;
+ }
+ }
+ break;
+ case CMD_THROTTLE_DELAY_SETTINGS:
+ settings = throttle_delay_settings;
+ ret = copy_to_user((void *)arg, &settings, sizeof(struct diag_throttle_delay_settings));
+ break;
+ case CMD_THROTTLE_DELAY_DUMP:
+ ret = copy_from_user(&dump_param, (void *)arg, sizeof(struct diag_ioctl_dump_param));
+ if (!throttle_delay_alloced) {
+ ret = -EINVAL;
+ } else if (!ret) {
+ dump_data();
+ ret = copy_to_user_variant_buffer(&throttle_delay_variant_buffer,
+ dump_param.user_ptr_len, dump_param.user_buf, dump_param.user_buf_len);
+ diag_throttle_delay_id++;
+ record_dump_cmd("throttle-delay");
+ }
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+
+ return ret;
+}
+
+int diag_throttle_delay_init(void)
+{
+ if (lookup_syms())
+ return -EINVAL;
+
+ init_diag_variant_buffer(&throttle_delay_variant_buffer, 4 * 1024 * 1024);
+ jump_init();
+
+ if (throttle_delay_settings.activated)
+ throttle_delay_settings.activated = __activate_throttle_delay();
+
+ return 0;
+
+}
+
+void diag_throttle_delay_exit(void)
+{
+ if (throttle_delay_settings.activated)
+ __deactivate_throttle_delay();
+ throttle_delay_settings.activated = 0;
+
+ destroy_diag_variant_buffer(&throttle_delay_variant_buffer);
+}
+#else
+int diag_throttle_delay_init(void)
+{
+ return 0;
+}
+
+void diag_throttle_delay_exit(void)
+{
+
+}
+#endif
diff --git a/SOURCE/uapi/ali_diagnose.h b/SOURCE/uapi/ali_diagnose.h
index dc7f2d7..f0f06e5 100644
--- a/SOURCE/uapi/ali_diagnose.h
+++ b/SOURCE/uapi/ali_diagnose.h
@@ -99,8 +99,9 @@ extern unsigned long debug_mode;
#define DIAG_IOCTL_TYPE_RW_SEM (DIAG_IOCTL_TYPE_TASK_MONITOR + 1)
#define DIAG_IOCTL_TYPE_RSS_MONITOR (DIAG_IOCTL_TYPE_RW_SEM + 1)
#define DIAG_IOCTL_TYPE_MEMCG_STATS (DIAG_IOCTL_TYPE_RSS_MONITOR + 1)
+#define DIAG_IOCTL_TYPE_THROTTLE_DELAY (DIAG_IOCTL_TYPE_MEMCG_STATS + 1)
-#define DIAG_IOCTL_TYPE_END (DIAG_IOCTL_TYPE_MEMCG_STATS + 1)
+#define DIAG_IOCTL_TYPE_END (DIAG_IOCTL_TYPE_THROTTLE_DELAY + 1)
long diag_ioctl_sys_delay(unsigned int cmd, unsigned long arg);
long diag_ioctl_sys_cost(unsigned int cmd, unsigned long arg);
@@ -343,6 +344,11 @@ struct diag_ioctl_dump_param_cycle {
#define DIAG_BASE_SYSCALL_MEMCG_STATS \
(DIAG_BASE_SYSCALL_PING_DELAY6 + DIAG_SYSCALL_INTERVAL)
+/// 1900
+#define DIAG_BASE_SYSCALL_THROTTLE_DELAY \
+ (DIAG_BASE_SYSCALL_PING_DELAY6 + DIAG_SYSCALL_INTERVAL)
+
+
#define DIAG_SYSCALL_END (DIAG_BASE_SYSCALL + DIAG_SYSCALL_INTERVAL * 1000)
enum diag_record_id {
@@ -511,6 +517,10 @@ enum diag_record_id {
et_memcg_stats_summary,
et_memcg_stats_detail,
+ et_throttle_delay_base = et_rss_monitor_base + DIAG_EVENT_TYPE_INTERVAL,
+ et_throttle_delay_dither,
+ et_throttle_delay_rq,
+
et_count
};
diff --git a/SOURCE/uapi/throttle_delay.h b/SOURCE/uapi/throttle_delay.h
new file mode 100644
index 0000000..f304030
--- /dev/null
+++ b/SOURCE/uapi/throttle_delay.h
@@ -0,0 +1,62 @@
+/*
+ * Linux内核诊断工具--用户接口API
+ *
+ * Copyright (C) 2020 Alibaba Ltd.
+ *
+ * 作者: Xiongwei Jiang <[email protected]>
+ *
+ * License terms: GNU General Public License (GPL) version 3
+ *
+ */
+
+#ifndef UAPI_THROTTLE_DELAY_H
+#define UAPI_THROTTLE_DELAY_H
+
+#include <linux/ioctl.h>
+
+int throttle_delay_syscall(struct pt_regs *regs, long id);
+
+#define DIAG_THROTTLE_DELAY_SET (DIAG_BASE_SYSCALL_THROTTLE_DELAY)
+#define DIAG_THROTTLE_DELAY_SETTINGS (DIAG_THROTTLE_DELAY_SET + 1)
+#define DIAG_THROTTLE_DELAY_DUMP (DIAG_THROTTLE_DELAY_SETTINGS + 1)
+
+struct diag_throttle_delay_settings {
+ unsigned int activated;
+ unsigned int verbose;
+ unsigned int tgid;
+ unsigned int pid;
+ unsigned int bvt;
+ char comm[TASK_COMM_LEN];
+ unsigned int threshold_ms;
+};
+
+struct throttle_delay_rq {
+ int et_type;
+ unsigned long id;
+ unsigned long seq;
+ struct diag_timespec tv;
+ int cpu;
+ int nr_running;
+};
+
+struct throttle_delay_dither {
+ int et_type;
+ unsigned long id;
+ unsigned long seq;
+ struct diag_timespec tv;
+ unsigned long delay_ms;
+ unsigned long now, dequeued;
+ struct diag_task_detail task;
+ struct diag_kern_stack_detail kern_stack;
+ struct diag_user_stack_detail user_stack;
+ struct diag_proc_chains_detail proc_chains;
+};
+
+#define CMD_THROTTLE_DELAY_SET (0)
+#define CMD_THROTTLE_DELAY_SETTINGS (CMD_THROTTLE_DELAY_SET + 1)
+#define CMD_THROTTLE_DELAY_DUMP (CMD_THROTTLE_DELAY_SETTINGS + 1)
+#define DIAG_IOCTL_THROTTLE_DELAY_SET _IOR(DIAG_IOCTL_TYPE_THROTTLE_DELAY, CMD_THROTTLE_DELAY_SET, struct diag_throttle_delay_settings)
+#define DIAG_IOCTL_THROTTLE_DELAY_SETTINGS _IOW(DIAG_IOCTL_TYPE_THROTTLE_DELAY, CMD_THROTTLE_DELAY_SETTINGS, struct diag_throttle_delay_settings)
+#define DIAG_IOCTL_THROTTLE_DELAY_DUMP _IOR(DIAG_IOCTL_TYPE_THROTTLE_DELAY, CMD_THROTTLE_DELAY_DUMP, struct diag_ioctl_dump_param)
+
+#endif /* UAPI_THROTTLE_DELAY_H */