✨ feat(CPU limit): 使用EWMA计算CPU占用，α取值0.8

author: yangwei <[email protected]> 2023-12-25 16:57:19 +0800
committer: yangwei <[email protected]> 2023-12-25 17:02:52 +0800
commit: ee6fc641018252eee590ef17324d8ba32355b47c (patch)
tree: 1fac3cb9b6215985ac5df723078f9c74986b1cc2
parent: 382f15bd1b7ddc220f08fa0b275a8566603e8ed1 (diff)
4 files changed, 22 insertions, 18 deletions
diff --git a/bin/etc/sapp.toml b/bin/etc/sapp.toml
index ed3a8fe..df65495 100644
--- a/bin/etc/sapp.toml
+++ b/bin/etc/sapp.toml
@@ -91,16 +91,16 @@
     stream_bypass_enabled=0
 ### note, cpu usage value is percent, for example, config value is 85, means 85%, valid range: [1,100]    
 ### sapp change to bypass state immediately when realtime cpu usage > bypass_trigger_cpu_usage
-    bypass_trigger_cpu_usage=85    
+    bypass_trigger_cpu_usage=95    
 ### note, unit of get_cpu_usage_interval is milliseconds(ms)     
     get_cpu_usage_interval=500
 ### note, use the average of the last $smooth_avg_window times as current realtime value 
-    smooth_avg_window=2
+    smooth_avg_window=0
 
     decrease_ratio="0.95"
     increase_ratio="1.005"
 ### note, unit of bypass_observe_time is second(s)     
-    recovery_observe_time=30
+    recovery_observe_time=3
 
 
 [PROTOCOL_FEATURE]
diff --git a/src/config/config_parse.cpp b/src/config/config_parse.cpp
index 26d4e0d..bb4493a 100644
--- a/src/config/config_parse.cpp
+++ b/src/config/config_parse.cpp
@@ -1741,10 +1741,10 @@ int sapp_parse_config(void)
 
 	/*******************************  packet_io.under_ddos ******************************/
 	tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"stream_bypass_enabled", &pconfig->packet_io.under_ddos_config.enabled, 0); //��ǰ����, Ĭ�ϲ�����
-	tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"get_cpu_usage_interval", &pconfig->packet_io.under_ddos_config.get_cpu_usage_interval, 50);
-	tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"smooth_avg_window", &pconfig->packet_io.under_ddos_config.smooth_avg_window, 3);
+	tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"get_cpu_usage_interval", &pconfig->packet_io.under_ddos_config.get_cpu_usage_interval, 500);
+	tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"smooth_avg_window", &pconfig->packet_io.under_ddos_config.smooth_avg_window, 2);
 
-	tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"bypass_trigger_cpu_usage", &tmp_int, 90);
+	tomlc99_wrap_load_int_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"bypass_trigger_cpu_usage", &tmp_int, 95);
 	pconfig->packet_io.under_ddos_config.bypass_trigger_cpu_usage = (double)tmp_int;
 
 	tomlc99_wrap_load_string_def(ABBR_CFG_FILE_MAIN_ENTRY, (char *)"packet_io.under_ddos", (char *)"decrease_ratio", str_tmp, sizeof(str_tmp), "0.99");
diff --git a/src/packet_io/under_ddos.cpp b/src/packet_io/under_ddos.cpp
index 5431b73..20054f2 100644
--- a/src/packet_io/under_ddos.cpp
+++ b/src/packet_io/under_ddos.cpp
@@ -45,6 +45,7 @@ typedef struct{
 	unsigned long long last_create_stream_new_sum[MAX_CORE_NUM];
 	double last_time_cpu_total[MAX_CORE_NUM];
 	double last_time_cpu_idle[MAX_CORE_NUM];
+	double ewma_cpu_usage[MAX_CORE_NUM];
 	cpu_tck_t all_cpu_usage[MAX_CORE_NUM];
 }under_sapp_user_args_t;
 
@@ -126,7 +127,7 @@ static void read_cpu_usage_from_proc(cpu_tck_t *per_cpu_core_stat, int max_cpu_n
 
 static double sapp_get_cpu_usage_cb(cpu_limit_handle h, int _thread_index, void *_void_user_arg)
 {
-	double cpu_usage;
+	double current_cpu_usage;
 	uint64_t this_total_tcks, this_idle_tcks;
 	int cpu_core_id;
 	int sys_actual_cpu_core_num = get_nprocs();
@@ -151,21 +152,24 @@ static double sapp_get_cpu_usage_cb(cpu_limit_handle h, int _thread_index, void
 	this_total_tcks = calc_total_ticks(&ud_usr_arg->all_cpu_usage[cpu_core_id]);
 	this_idle_tcks = ud_usr_arg->all_cpu_usage[cpu_core_id].tcks[TCK_IDLE];
 
-	if(0 == ud_usr_arg->last_time_cpu_total[cpu_core_id]){
-		/* ��һ��Ϊ0, Ϊ�˱������, �˴β����� */
-		ud_usr_arg->last_time_cpu_total[cpu_core_id] = this_total_tcks;
-		ud_usr_arg->last_time_cpu_idle[cpu_core_id] = this_idle_tcks;
-		return 0.0;
-	}
-
 	/* ���һ��ʱ��, ��������used�Ĳ�ֵ�������������Ĳ�ֵ, 
 	   ����ֱ��������, ���Ǳ�ʾ�Ի����ӵ���������cpuռ���ʵ���ƽ��ֵ, 
 	*/
-	cpu_usage = 100.0 * ((this_total_tcks - this_idle_tcks)-(ud_usr_arg->last_time_cpu_total[cpu_core_id] - ud_usr_arg->last_time_cpu_idle[cpu_core_id]))/(this_total_tcks - ud_usr_arg->last_time_cpu_total[cpu_core_id]);
+	current_cpu_usage = 100.0 * ((this_total_tcks - this_idle_tcks)-(ud_usr_arg->last_time_cpu_total[cpu_core_id] - ud_usr_arg->last_time_cpu_idle[cpu_core_id]))/(this_total_tcks - ud_usr_arg->last_time_cpu_total[cpu_core_id]);
+
+#define EWMA_FACTOR 0.8
+	if(ud_usr_arg->ewma_cpu_usage[cpu_core_id] == 0)
+	{
+		ud_usr_arg->ewma_cpu_usage[cpu_core_id] = current_cpu_usage;
+	}
+	else
+	{
+		ud_usr_arg->ewma_cpu_usage[cpu_core_id] = (EWMA_FACTOR * current_cpu_usage) + ((1 - EWMA_FACTOR) * ud_usr_arg->ewma_cpu_usage[cpu_core_id]);
+	}
 	ud_usr_arg->last_time_cpu_total[cpu_core_id] = this_total_tcks;
 	ud_usr_arg->last_time_cpu_idle[cpu_core_id]  = this_idle_tcks;	
 
-	return cpu_usage;
+	return ud_usr_arg->ewma_cpu_usage[cpu_core_id];
 
 }
 
diff --git a/src/support/cpu_limit/cpu_limit.c b/src/support/cpu_limit/cpu_limit.c
index 3ea9203..e21738e 100644
--- a/src/support/cpu_limit/cpu_limit.c
+++ b/src/support/cpu_limit/cpu_limit.c
@@ -278,9 +278,9 @@ static void cl_analysis(cpu_limit_inner_t *h)
 		
 		if(this_stat->realtime_res_val >= h->user_trigger_value){//超最高限制阈值
 			cl_analysis_reduce_state(h, tid, this_stat);
-			if(this_stat->realtime_res_val >= 99.0 && MESA_handle_runtime_log_level_enabled(ABBR_PROCESS_LATENCY_LOG_HANDLE, RLOG_LV_FATAL))
+			if(this_stat->realtime_res_val >= 99.9 && MESA_handle_runtime_log_level_enabled(ABBR_PROCESS_LATENCY_LOG_HANDLE, RLOG_LV_FATAL))
 			{
-				sapp_process_latency_log(RLOG_LV_FATAL, "cpu_limit usage over 99%%, send SIGUSR2 to thread:%d, tid:%d", tid, sapp_global_val->individual_fixed.thread_obtain_id[tid]);
+				sapp_process_latency_log(RLOG_LV_FATAL, "[cpu_limit] thread:%d usage:%.2f over 99.9%%, send SIGUSR2 to tid:%d", tid, this_stat->realtime_res_val, sapp_global_val->individual_fixed.thread_obtain_id[tid]);
 				pthread_kill(sapp_global_val->individual_fixed.thread_obtain_id[tid], SIGUSR2);
 			}
 		}else{
author	yangwei <[email protected]>	2023-12-25 16:57:19 +0800
committer	yangwei <[email protected]>	2023-12-25 17:02:52 +0800
commit	ee6fc641018252eee590ef17324d8ba32355b47c (patch)
tree	1fac3cb9b6215985ac5df723078f9c74986b1cc2
parent	382f15bd1b7ddc220f08fa0b275a8566603e8ed1 (diff)