summaryrefslogtreecommitdiff
path: root/inc/Maat_rule.h
blob: b232632783b2ffb38aa291a9fa8163e1c224292c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357

/*
*****************Maat Deep Packet Inspection Policy Framework********
*	Maat is the Goddess of truth and justice in ancient Egyptian concept.
*	Her feather was the measure that determined whether the souls (considered 
*	to reside in the heart) of the departed would reach the paradise of afterlife
*	successfully.
*	Author: zhengchao, MESA
*	Version 2020-06-13 version 3.0.0
*********************************************************
*/
#ifndef H_MAAT_RULE_H_INCLUDE
#define H_MAAT_RULE_H_INCLUDE
#ifdef __cplusplus
extern "C"{
#endif
#include <MESA/stream.h>
enum MAAT_CHARSET
{
	CHARSET_NONE=0,
	CHARSET_GBK,
	CHARSET_BIG5,
	CHARSET_UNICODE,
	CHARSET_UTF8,	// 4
	CHARSET_BIN,	//5
	CHARSET_UNICODE_ASCII_ESC,		// Unicode Escape format, prefix backslash-u hex, e.g. "\u627;"
	CHARSET_UNICODE_ASCII_ALIGNED,//Unicode Escape format, prefix backslash-u with 4 bytes aligned, e.g. "\u0627"
	CHARSET_UNICODE_NCR_DEC,		//SGML Numeric character reference,decimal base, e.g. "&#1575;"
	CHARSET_UNICODE_NCR_HEX,		//SGML Numeric character reference,hexdecimal base, e.g. "&#x627;"
	CHARSET_URL_ENCODE_GB2312,		//URL encode with GB2312, e.g. the chinese word "china" was encoded to %D6%D0%B9%FA
	CHARSET_URL_ENCODE_UTF8,		//11, URL encode with UTF8,e.g. the chinese word "china" was encoded to %E4%B8%AD%E5%9B%BD
	CHARSET_WINDOWS1251,
	__CHARSET_MAX
};
enum MAAT_ACTION
{
	MAAT_ACTION_BLOCK=0,
	MAAT_ACTION_MONIT,
	MAAT_ACTION_WHITE
};
enum MAAT_POS_TYPE
{
	MAAT_POSTYPE_EXPR=0,
	MAAT_POSTYPE_REGEX
};
typedef	void*	scan_status_t;
typedef	void*	stream_para_t;
typedef	void*	Maat_feather_t;


#define	MAX_SERVICE_DEFINE_LEN			128
#define	MAX_HUGE_SERVICE_DEFINE_LEN		(1024*4)
struct Maat_rule_t
{
	int		config_id;
	int		service_id;
	unsigned char	do_log;
	unsigned char 	do_blacklist;
	unsigned char	action;
	unsigned char	reserved;
	int		serv_def_len;
	char 	service_defined[MAX_SERVICE_DEFINE_LEN];
};
#define	MAAT_RULE_UPDATE_TYPE_FULL	1
#define	MAAT_RULE_UPDATE_TYPE_INC	2
typedef void Maat_start_callback_t(int update_type,void* u_para);
typedef void Maat_update_callback_t(int table_id,const char* table_line,void* u_para);
typedef void Maat_finish_callback_t(void* u_para);





//--------------------HITTING DETAIL DESCRIPTION BEGIN

#define	MAAT_MAX_HIT_RULE_NUM		8
#define MAAT_MAX_EXPR_ITEM_NUM		8
#define	MAAT_MAX_HIT_POS_NUM		8
#define	MAAT_MAX_REGEX_GROUP_NUM	8

//NOTE position buffer as hitting_regex_pos and hit_pos,are ONLY valid before next scan or Maat_stream_scan_string_end
struct regex_pos_t
{
	int group_num;
	int hitting_regex_len;
	const char* hitting_regex_pos;
	int grouping_len[MAAT_MAX_REGEX_GROUP_NUM];
	const char* grouping_pos[MAAT_MAX_REGEX_GROUP_NUM];
};
struct str_pos_t
{
	int hit_len;
	const char* hit_pos;
};
struct sub_item_pos_t
{
	enum MAAT_POS_TYPE ruletype;
	int hit_cnt;
	union
	{
		struct regex_pos_t	regex_pos[MAAT_MAX_HIT_POS_NUM];
		struct str_pos_t substr_pos[MAAT_MAX_HIT_POS_NUM];
	};
};

struct Maat_region_pos_t
{
	
	int region_id;
	int sub_item_num;
	struct sub_item_pos_t sub_item_pos[MAAT_MAX_EXPR_ITEM_NUM];	
};

struct Maat_hit_detail_t
{
	int config_id;//set <0 if half hit;
	int hit_region_cnt;
	struct Maat_region_pos_t region_pos[MAAT_MAX_HIT_RULE_NUM];	
};
//--------------------HITTING DETAIL DESCRIPTION END

//Abondon interface ,left for compatible.
Maat_feather_t Maat_summon_feather(int max_thread_num,
								const char* table_info_path,
								const char* ful_cfg_dir,
								const char* inc_cfg_dir,
								void*logger);//MESA_handle_logger
//Abondon interface ,left for compatible. 
Maat_feather_t Maat_summon_feather_json(int max_thread_num,
									const char* table_info_path,
									const char* json_rule,
									void* logger);

Maat_feather_t Maat_feather(int max_thread_num,const char* table_info_path,void* logger);
int Maat_initiate_feather(Maat_feather_t feather);

enum MAAT_INIT_OPT
{
	MAAT_OPT_SCANDIR_INTERVAL_MS=1,	//VALUE is interger, SIZE=sizeof(int). DEFAULT:1,000 milliseconds.
	MAAT_OPT_EFFECT_INVERVAL_MS,	//VALUE is interger, SIZE=sizeof(int). DEFAULT:60,000 milliseconds.
	MAAT_OPT_FULL_CFG_DIR,			//VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1.DEFAULT: no default.
	MAAT_OPT_INC_CFG_DIR,			//VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1.DEFAULT: no default.
	MAAT_OPT_JSON_FILE_PATH,		//VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1.DEFAULT: no default.
	MAAT_OPT_STAT_ON,				//VALUE is NULL, SIZE is 0. MAAT_OPT_STAT_FILE_PATH must be set. Default: stat OFF.
	MAAT_OPT_PERF_ON,				//VALUE is NULL, SIZE is 0. MAAT_OPT_STAT_FILE_PATH must be set. Default: stat OFF.
	MAAT_OPT_STAT_FILE_PATH,		//VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1. DEFAULT: no default.
	MAAT_OPT_SCAN_DETAIL,			//VALUE is interger *, SIZE=sizeof(int). 0: not return any detail;1: return  hit pos, not include regex grouping.
									//  2 return hit pos and regex grouping pos;DEFAULT:0
	MAAT_OPT_INSTANCE_NAME,			//VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1, no more than 11 bytes.DEFAULT: MAAT_$tableinfo_path$.
	MAAT_OPT_DECRYPT_KEY,			//VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1. No DEFAULT.
	MAAT_OPT_REDIS_IP,				//VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1. No DEFAULT.
	MAAT_OPT_REDIS_PORT,			//VALUE is a unsigned short or a signed int, host order, SIZE= sizeof(unsigned short) or sizeof(int). No DEFAULT.
	MAAT_OPT_REDIS_INDEX,			//VALUE is interger *, 0~15, SIZE=sizeof(int). DEFAULT: 0.
	MAAT_OPT_CMD_AUTO_NUMBERING,	//VALUE is a interger *, 1 or 0, SIZE=sizeof(int). DEFAULT: 1.
	MAAT_OPT_DEFERRED_LOAD,			//VALUE is NULL,SIZE is 0.  Default: Deffered initialization OFF.
	MAAT_OPT_CUMULATIVE_UPDATE_OFF,	//VALUE is NULL,SIZE is 0.  Default: CUMMULATIVE UPDATE ON.
	MAAT_OPT_LOAD_VERSION_FROM,		//VALUE is a long long, SIZE=sizeof(long long). Default: Load the Latest. Only valid in redis mode, and maybe failed for too old. 
									//This option also disables background update.
	MAAT_OPT_ENABLE_UPDATE,			//VALUE is interger, SIZE=sizeof(int). 1: Enabled, 0:Disabled.  DEFAULT: Backgroud update is enabled. Runtime setting is allowed.
	MAAT_OPT_ACCEPT_TAGS,			//VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1. Format is a JSON, e.g.{"tags":[{"tag":"location","value":"Beijing/ChaoYang/Huayan/22A"},{"tag":"isp","value":"telecom"}]}
	MAAT_OPT_FOREIGN_CONT_DIR,		//VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1. Specifies a local diretory to store foreign content. Default: []table_info_path]_files
	MAAT_OPT_GARBAGE_COLLECTION_TIMEOUT_MS, //VALUE is interger, SIZE=sizeof(int). DEFAULT:10,000 milliseconds.
	MAAT_OPT_JSON_IS_GZIPPED,		//VALUE is NULL, SIZE is 0. Default: 0, Not compressed by gzip.
	MAAT_OPT_STATUS_OUTPUT_PROMETHEUS	//VALUE is a interger *, 1 or 0, SIZE=sizeof(int). DEFAULT: 1.
};
//return -1 if failed, return 0 on success;
int Maat_set_feather_opt(Maat_feather_t feather,enum MAAT_INIT_OPT  type,const void* value,int size);
enum MAAT_STATE_OPT
{
	MAAT_STATE_VERSION=1,			//Get current maat version, if maat is in update progress, the updating version is returned. VALUE is long long, SIZE=sizeof(long long). 
	MAAT_STATE_LAST_UPDATING_TABLE,	//Query at Maat_finish_callback_t to determine whether this table is the last one to update. VALUE is interger, SIZE=sizeof(int), 1:yes, 0: no
	MAAT_STATE_IN_UPDATING
};
int Maat_read_state(Maat_feather_t feather, enum MAAT_STATE_OPT  type, void* value, int size);

void Maat_burn_feather(Maat_feather_t feather);

//return table_id(>=0) if success,otherwise return -1;
int Maat_table_register(Maat_feather_t feather,const char* table_name);
//return 1 if success,otherwise return -1 incase invalid table_id or registed function number exceed 32;
int Maat_table_callback_register(Maat_feather_t feather,short table_id,
									Maat_start_callback_t *start,//MAAT_RULE_UPDATE_TYPE_*,u_para
									Maat_update_callback_t *update,//table line ,u_para
									Maat_finish_callback_t *finish,//u_para
									void* u_para);

#define MAX_HIT_REGION_NUM_PER_GROUP 	128

struct Maat_hit_path_t
{
	int Nth_scan;
	int region_id;
	int sub_group_id;
	int top_group_id;
	int virtual_table_id; // 0 is not a virtual table.
	int compile_id;
};
enum MAAT_SCAN_OPT
{
	MAAT_SET_SCAN_DISTRICT=1,		//VALUE is a const char*, SIZE= strlen(string). DEFAULT: no default.
	MAAT_SET_SCAN_LAST_REGION,		//VALUE is NULL, SIZE=0. This option indicates that the follow scan is the last region of current scan combination.
	MAAT_GET_SCAN_HIT_PATH			//VALUE is struct Maat_hit_path_t*, an array of struct Maat_hit_path_t, SIZE= sizeof(struct Maat_hit_path_t)*N,
									//Maat_get_scan_status returns actual got number.
};
//return 0 if success, return -1 when failed;
int Maat_set_scan_status(Maat_feather_t feather, scan_status_t* mid, enum MAAT_SCAN_OPT type, const void* value, int size);

//return >=0 if success, return -1 when failed;
int Maat_get_scan_status(Maat_feather_t feather, scan_status_t* mid, enum MAAT_SCAN_OPT type, void* value, int size);

//Return hit rule number, return -1 when error occurs,return -2 when hit current region
//mid MUST set to NULL before fist call
int Maat_scan_flag(Maat_feather_t feather,int table_id
                        ,uint64_t flag
                        ,struct Maat_rule_t *result,int rule_num
                        ,scan_status_t *mid,int thread_num);
int Maat_scan_intval(Maat_feather_t feather,int table_id
						,unsigned int intval
						,struct Maat_rule_t*result,int rule_num
						,scan_status_t *mid,int thread_num);
int Maat_scan_addr(Maat_feather_t feather,int table_id
						,struct ipaddr* addr
						,struct Maat_rule_t*result,int rule_num
						,scan_status_t *mid,int thread_num);
int Maat_scan_proto_addr(Maat_feather_t feather,int table_id
						,struct ipaddr* addr,unsigned short int proto
						,struct Maat_rule_t*result,int rule_num
						,scan_status_t *mid,int thread_num);
int Maat_full_scan_string(Maat_feather_t feather,int table_id
						,enum MAAT_CHARSET charset,const char* data,int data_len
						,struct Maat_rule_t*result,int* found_pos,int rule_num
						,scan_status_t* mid,int thread_num);
//hit_detail could be NULL if not cared.
int Maat_full_scan_string_detail(Maat_feather_t feather,int table_id
						,enum MAAT_CHARSET charset,const char* data,int data_len
						,struct Maat_rule_t*result,int rule_num,struct Maat_hit_detail_t *hit_detail,int detail_num
						,int* detail_ret,scan_status_t* mid,int thread_num);

stream_para_t Maat_stream_scan_string_start(Maat_feather_t feather,int table_id,int thread_num);
int Maat_stream_scan_string(stream_para_t* stream_para
									,enum MAAT_CHARSET charset,const char* data,int data_len
									,struct Maat_rule_t*result,int* found_pos,int rule_num
									,scan_status_t* mid);
//hited_detail could be NULL if not cared.
int Maat_stream_scan_string_detail(stream_para_t* stream_para
									,enum MAAT_CHARSET charset,const char* data,int data_len
									,struct Maat_rule_t*result,int rule_num,struct Maat_hit_detail_t *hit_detail,int detail_num
									,int* detail_ret,scan_status_t* mid);
void Maat_stream_scan_string_end(stream_para_t* stream_para);

stream_para_t Maat_stream_scan_digest_start(Maat_feather_t feather,int table_id,unsigned long long total_len,int thread_num);
int Maat_stream_scan_digest(stream_para_t* stream_para
									,const char* data,int data_len,unsigned long long offset
									,struct Maat_rule_t*result,int rule_num
									,scan_status_t* mid);
void Maat_stream_scan_digest_end(stream_para_t* stream_para);

int Maat_similar_scan_string(Maat_feather_t feather,int table_id
						,const char* data,int data_len
						,struct Maat_rule_t*result,int rule_num
						,scan_status_t* mid,int thread_num);

void Maat_clean_status(scan_status_t* mid);

typedef void*  MAAT_RULE_EX_DATA;
// The idx parameter is the index: this will be the same value returned by Maat_rule_get_ex_new_index() when the functions were initially registered.
// Finally the argl and argp parameters are the values originally passed to the same corresponding parameters when Maat_rule_get_ex_new_index() was called.
typedef void Maat_rule_EX_new_func_t(int idx, const struct Maat_rule_t* rule, const char* srv_def_large,
											MAAT_RULE_EX_DATA* ad, long argl, void *argp);
typedef void Maat_rule_EX_free_func_t(int idx, const struct Maat_rule_t* rule, const char* srv_def_large,
											MAAT_RULE_EX_DATA* ad, long argl, void *argp);
typedef void Maat_rule_EX_dup_func_t(int idx,	MAAT_RULE_EX_DATA *to, MAAT_RULE_EX_DATA *from, long argl, void *argp);

int Maat_rule_get_ex_new_index(Maat_feather_t feather, const char* compile_table_name,
										 Maat_rule_EX_new_func_t* new_func,
										 Maat_rule_EX_free_func_t* free_func,
										 Maat_rule_EX_dup_func_t* dup_func,
										 long argl, void *argp);
//returned data is duplicated by dup_func of Maat_rule_get_ex_new_index,  caller is responsible to free the data.
MAAT_RULE_EX_DATA Maat_rule_get_ex_data(Maat_feather_t feather, const struct Maat_rule_t* rule, int idx);

//Sort rules by their evaluation order.
//rule_array will be modified with sorted rule.
//Return sortted rule number, maybe less than n_rule if some rules are invalid.
size_t Maat_rule_sort_by_evaluation_order(Maat_feather_t feather, struct Maat_rule_t* rule_array, size_t n_rule);


//Helper function for parsing space or tab seperated line.
//Nth_column: the Nth column is numberd from 1.
//Return 0 if success.
int Maat_helper_read_column(const char* line, int Nth_column, size_t *column_offset, size_t *column_len);


//Following functions are similar to Maat_rule_get_ex_data, except they are effective on plugin table.
typedef void*  MAAT_PLUGIN_EX_DATA;
typedef void Maat_plugin_EX_new_func_t(int table_id, const char* key, const char* table_line, MAAT_PLUGIN_EX_DATA* ad, long argl, void *argp);
typedef void Maat_plugin_EX_free_func_t(int table_id, MAAT_PLUGIN_EX_DATA* ad, long argl, void *argp);
typedef void Maat_plugin_EX_dup_func_t(int table_id,	MAAT_PLUGIN_EX_DATA *to, MAAT_PLUGIN_EX_DATA *from, long argl, void *argp);
typedef int Maat_plugin_EX_key2index_func_t(const char* key);

int Maat_plugin_EX_register(Maat_feather_t feather, int table_id,
										Maat_plugin_EX_new_func_t* new_func,
										Maat_plugin_EX_free_func_t* free_func,
										Maat_plugin_EX_dup_func_t* dup_func,
										Maat_plugin_EX_key2index_func_t* key2index_func,
										long argl, void *argp);
//Data is duplicated by dup_func of Maat_plugin_EX_register,  caller is responsible to FREE the data.
MAAT_PLUGIN_EX_DATA Maat_plugin_get_EX_data(Maat_feather_t feather, int table_id, const char* key);


int Maat_ip_plugin_EX_register(Maat_feather_t feather, int table_id,
										Maat_plugin_EX_new_func_t* new_func,
										Maat_plugin_EX_free_func_t* free_func,
										Maat_plugin_EX_dup_func_t* dup_func,
										long argl, void *argp);

struct ip_address
{
	int ip_type;		//4: IPv4, 6: IPv6
	union
	{
		unsigned int ipv4;	//network order
		unsigned int ipv6[4];
	};
};

int Maat_ip_plugin_get_EX_data(Maat_feather_t feather, int table_id, const struct ip_address* ip, MAAT_PLUGIN_EX_DATA* ex_data_array, size_t n_ex_data);

int Maat_fqdn_plugin_EX_register(Maat_feather_t feather, int table_id,
										Maat_plugin_EX_new_func_t* new_func,
										Maat_plugin_EX_free_func_t* free_func,
										Maat_plugin_EX_dup_func_t* dup_func,
										long argl, void *argp);

//Return order: Longger suffix first, then fqdn with bigger index first.
int Maat_fqdn_plugin_get_EX_data(Maat_feather_t feather, int table_id, const char* fqdn, MAAT_PLUGIN_EX_DATA* ex_data_array, size_t n_ex_data);

int Maat_bool_plugin_EX_register(Maat_feather_t feather, int table_id,
										Maat_plugin_EX_new_func_t *new_func,
										Maat_plugin_EX_free_func_t *free_func,
										Maat_plugin_EX_dup_func_t *dup_func,
										long argl, void *argp);
int Maat_bool_plugin_get_EX_data(Maat_feather_t feather, int table_id, unsigned long long item_ids[], size_t n_item, MAAT_PLUGIN_EX_DATA* ex_data_array, size_t n_ex_data);

enum MAAT_RULE_OPT
{
	MAAT_RULE_SERV_DEFINE	//VALUE is a char* buffer,SIZE= buffer size. 
};
int Maat_read_rule(Maat_feather_t feather, const struct Maat_rule_t* rule, enum MAAT_RULE_OPT type, void* value, int size);

#ifdef __cplusplus 
}//end extern "C"
#endif


#endif	//	H_MAAT_RULE_H_INCLUDE