summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoryangwei <[email protected]>2023-12-25 16:57:19 +0800
committeryangwei <[email protected]>2023-12-25 17:02:52 +0800
commitee6fc641018252eee590ef17324d8ba32355b47c (patch)
tree1fac3cb9b6215985ac5df723078f9c74986b1cc2
parent382f15bd1b7ddc220f08fa0b275a8566603e8ed1 (diff)
✨ feat(CPU limit): 使用EWMA计算CPU占用,α取值0.8
-rw-r--r--bin/etc/sapp.toml6
-rw-r--r--src/config/config_parse.cpp6
-rw-r--r--src/packet_io/under_ddos.cpp24
-rw-r--r--src/support/cpu_limit/cpu_limit.c4
4 files changed, 22 insertions, 18 deletions
diff --git a/bin/etc/sapp.toml b/bin/etc/sapp.toml
index ed3a8fe..df65495 100644
--- a/bin/etc/sapp.toml
+++ b/bin/etc/sapp.toml
@@ -91,16 +91,16 @@
stream_bypass_enabled=0
### note, cpu usage value is percent, for example, config value is 85, means 85%, valid range: [1,100]
### sapp change to bypass state immediately when realtime cpu usage > bypass_trigger_cpu_usage
- bypass_trigger_cpu_usage=85
+ bypass_trigger_cpu_usage=95
### note, unit of get_cpu_usage_interval is milliseconds(ms)
get_cpu_usage_interval=500
### note, use the average of the last $smooth_avg_window times as current realtime value
- smooth_avg_window=2
+ smooth_avg_window=0
decrease_ratio="0.95"
increase_ratio="1.005"
### note, unit of bypass_observe_time is second(s)
- recovery_observe_time=30
+ recovery_observe_time=3
[PROTOCOL_FEATURE]
diff --git a/src/config/config_parse.cpp b/src/config/config_parse.cpp
index 26d4e0d..bb4493a 100644
--- a/src/config/config_parse.cpp
+++ b/src/config/config_parse.cpp
@@ -1741,10 +1741,10 @@ int sapp_parse_config(void)
/******************************* packet_io.under_ddos ******************************/
tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"stream_bypass_enabled", &pconfig->packet_io.under_ddos_config.enabled, 0); //��ǰ����, Ĭ�ϲ�����
- tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"get_cpu_usage_interval", &pconfig->packet_io.under_ddos_config.get_cpu_usage_interval, 50);
- tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"smooth_avg_window", &pconfig->packet_io.under_ddos_config.smooth_avg_window, 3);
+ tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"get_cpu_usage_interval", &pconfig->packet_io.under_ddos_config.get_cpu_usage_interval, 500);
+ tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"smooth_avg_window", &pconfig->packet_io.under_ddos_config.smooth_avg_window, 2);
- tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"bypass_trigger_cpu_usage", &tmp_int, 90);
+ tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"bypass_trigger_cpu_usage", &tmp_int, 95);
pconfig->packet_io.under_ddos_config.bypass_trigger_cpu_usage = (double)tmp_int;
tomlc99_wrap_load_string_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"decrease_ratio", str_tmp, sizeof(str_tmp), "0.99");
diff --git a/src/packet_io/under_ddos.cpp b/src/packet_io/under_ddos.cpp
index 5431b73..20054f2 100644
--- a/src/packet_io/under_ddos.cpp
+++ b/src/packet_io/under_ddos.cpp
@@ -45,6 +45,7 @@ typedef struct{
unsigned long long last_create_stream_new_sum[MAX_CORE_NUM];
double last_time_cpu_total[MAX_CORE_NUM];
double last_time_cpu_idle[MAX_CORE_NUM];
+ double ewma_cpu_usage[MAX_CORE_NUM];
cpu_tck_t all_cpu_usage[MAX_CORE_NUM];
}under_sapp_user_args_t;
@@ -126,7 +127,7 @@ static void read_cpu_usage_from_proc(cpu_tck_t *per_cpu_core_stat, int max_cpu_n
static double sapp_get_cpu_usage_cb(cpu_limit_handle h, int _thread_index, void *_void_user_arg)
{
- double cpu_usage;
+ double current_cpu_usage;
uint64_t this_total_tcks, this_idle_tcks;
int cpu_core_id;
int sys_actual_cpu_core_num = get_nprocs();
@@ -151,21 +152,24 @@ static double sapp_get_cpu_usage_cb(cpu_limit_handle h, int _thread_index, void
this_total_tcks = calc_total_ticks(&ud_usr_arg->all_cpu_usage[cpu_core_id]);
this_idle_tcks = ud_usr_arg->all_cpu_usage[cpu_core_id].tcks[TCK_IDLE];
- if(0 == ud_usr_arg->last_time_cpu_total[cpu_core_id]){
- /* ��һ��Ϊ0, Ϊ�˱������, �˴β����� */
- ud_usr_arg->last_time_cpu_total[cpu_core_id] = this_total_tcks;
- ud_usr_arg->last_time_cpu_idle[cpu_core_id] = this_idle_tcks;
- return 0.0;
- }
-
/* ���һ��ʱ��, ��������used�IJ�ֵ�������������IJ�ֵ,
����ֱ��������, ���DZ�ʾ�Ի����ӵ���������cpuռ���ʵ���ƽ��ֵ,
*/
- cpu_usage = 100.0 * ((this_total_tcks - this_idle_tcks)-(ud_usr_arg->last_time_cpu_total[cpu_core_id] - ud_usr_arg->last_time_cpu_idle[cpu_core_id]))/(this_total_tcks - ud_usr_arg->last_time_cpu_total[cpu_core_id]);
+ current_cpu_usage = 100.0 * ((this_total_tcks - this_idle_tcks)-(ud_usr_arg->last_time_cpu_total[cpu_core_id] - ud_usr_arg->last_time_cpu_idle[cpu_core_id]))/(this_total_tcks - ud_usr_arg->last_time_cpu_total[cpu_core_id]);
+
+#define EWMA_FACTOR 0.8
+ if(ud_usr_arg->ewma_cpu_usage[cpu_core_id] == 0)
+ {
+ ud_usr_arg->ewma_cpu_usage[cpu_core_id] = current_cpu_usage;
+ }
+ else
+ {
+ ud_usr_arg->ewma_cpu_usage[cpu_core_id] = (EWMA_FACTOR * current_cpu_usage) + ((1 - EWMA_FACTOR) * ud_usr_arg->ewma_cpu_usage[cpu_core_id]);
+ }
ud_usr_arg->last_time_cpu_total[cpu_core_id] = this_total_tcks;
ud_usr_arg->last_time_cpu_idle[cpu_core_id] = this_idle_tcks;
- return cpu_usage;
+ return ud_usr_arg->ewma_cpu_usage[cpu_core_id];
}
diff --git a/src/support/cpu_limit/cpu_limit.c b/src/support/cpu_limit/cpu_limit.c
index 3ea9203..e21738e 100644
--- a/src/support/cpu_limit/cpu_limit.c
+++ b/src/support/cpu_limit/cpu_limit.c
@@ -278,9 +278,9 @@ static void cl_analysis(cpu_limit_inner_t *h)
if(this_stat->realtime_res_val >= h->user_trigger_value){//超最高限制阈值
cl_analysis_reduce_state(h, tid, this_stat);
- if(this_stat->realtime_res_val >= 99.0 && MESA_handle_runtime_log_level_enabled(ABBR_PROCESS_LATENCY_LOG_HANDLE, RLOG_LV_FATAL))
+ if(this_stat->realtime_res_val >= 99.9 && MESA_handle_runtime_log_level_enabled(ABBR_PROCESS_LATENCY_LOG_HANDLE, RLOG_LV_FATAL))
{
- sapp_process_latency_log(RLOG_LV_FATAL, "cpu_limit usage over 99%%, send SIGUSR2 to thread:%d, tid:%d", tid, sapp_global_val->individual_fixed.thread_obtain_id[tid]);
+ sapp_process_latency_log(RLOG_LV_FATAL, "[cpu_limit] thread:%d usage:%.2f over 99.9%%, send SIGUSR2 to tid:%d", tid, this_stat->realtime_res_val, sapp_global_val->individual_fixed.thread_obtain_id[tid]);
pthread_kill(sapp_global_val->individual_fixed.thread_obtain_id[tid], SIGUSR2);
}
}else{