#include "pattern_replace.h" #include #define PCRE2_CODE_UNIT_WIDTH 8 #include #include #include #include #include #include #define MAX_EDIT_MATCHES 16 enum replace_zone zone_name_to_id(const char * name) { const char * std_name[] = {"http_req_uri", "http_req_header", "http_req_body", "http_resp_header", "http_resp_body", "http_resp_body"}; size_t i = 0; for (i = 0; i < sizeof(std_name) / sizeof(const char *); i++) { if (0 == strcasecmp(name, std_name[i])) { break; } } return (enum replace_zone) i; } static char *__attribute__((__unused__)) strchr_esc(char * s, const char delim) { char * token; if (s == NULL) return NULL; for (token = s; *token != '\0'; token++) { if (*token == '\\') { token++; continue; } if (*token == delim) break; } if (*token == '\0') { return NULL; } else { return token; } } static char *__attribute__((__unused__)) strtok_r_esc(char * s, const char delim, char ** save_ptr) { char * token; if (s == NULL) s = *save_ptr; /* Scan leading delimiters. */ token = strchr_esc(s, delim); if (token == NULL) { *save_ptr = token; return s; } /* Find the end of the token. */ *token = '\0'; token++; *save_ptr = token; return s; } size_t __attribute__((__unused__)) format_replace_rule(const char * exec_para, struct replace_rule * replace, size_t n_replace) { char * tmp = ALLOC(char, strlen(exec_para) + 1); char * token = NULL, * sub_token = NULL, * saveptr = NULL, * saveptr2 = NULL; size_t idx = 0; const char * str_zone = "zone="; const char * str_subs = "substitute="; memcpy(tmp, exec_para, strlen(exec_para)); for (token = tmp;; token = NULL) { sub_token = strtok_r(token, ";", &saveptr); if (sub_token == NULL) break; if (0 == strncasecmp(sub_token, str_zone, strlen(str_zone))) { replace[idx].zone = zone_name_to_id(sub_token + strlen(str_zone)); if (replace[idx].zone == kZoneMax) { break; } } sub_token = strtok_r(NULL, ";", &saveptr); if (0 == strncasecmp(sub_token, str_subs, strlen(str_subs))) { sub_token += strlen(str_subs) + 1; replace[idx].find = tfe_strdup(strtok_r_esc(sub_token, '/', &saveptr2)); replace[idx].replace_with = tfe_strdup(strtok_r_esc(NULL, '/', &saveptr2)); idx++; if (idx == n_replace) { break; } } } free(tmp); tmp = NULL; return idx; } size_t select_replace_rule(enum replace_zone zone, const struct replace_rule * replace, size_t n_replace, const struct replace_rule ** selected, size_t n_selected) { size_t i = 0, j = 0; for (i = 0; i < n_replace && j < n_selected; i++) { if (replace[i].zone == zone) { selected[j] = replace + i; j++; } } return j; } size_t replace_string(const char * in, size_t in_sz, const struct replace_rule * zone, char** out, int options) { assert(strlen(zone->find) != 0); int error=0; PCRE2_SIZE erroffset=0; const PCRE2_SPTR pattern = (PCRE2_SPTR)zone->find; const PCRE2_SPTR subject = (PCRE2_SPTR)in; const PCRE2_SPTR replacement = (PCRE2_SPTR)zone->replace_with; uint32_t pcre2_options = options ? PCRE2_UTF : 0; pcre2_code *re = pcre2_compile(pattern, strlen(zone->find), pcre2_options, &error, &erroffset, 0); if (!re) return 0; pcre2_jit_compile(re, PCRE2_JIT_COMPLETE); PCRE2_SIZE outbuff_size = in_sz+sizeof(replacement)*MAX_EDIT_MATCHES; PCRE2_SIZE outlen = 0; PCRE2_UCHAR* out_buffer = NULL; not_enough_mem_retry: out_buffer = (PCRE2_UCHAR*)malloc(sizeof(PCRE2_UCHAR)*outbuff_size); outlen = outbuff_size; int rc = pcre2_substitute(re, subject, in_sz, 0, PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, 0, 0, //pcre2_match_data *match_data, pcre2_match_context replacement, strlen(zone->replace_with), out_buffer, &outlen); if(outlen>outbuff_size) { outbuff_size=outlen; free(out_buffer); out_buffer=NULL; goto not_enough_mem_retry; } if(rc<=0) { free(out_buffer); outlen=0; } else { *out=(char*)out_buffer; } pcre2_code_free(re); return outlen; } size_t execute_replace_rule(const char * in, size_t in_sz, enum replace_zone zone, const struct replace_rule * rules, size_t n_rule, char** out, int options) { const struct replace_rule * todo[n_rule]; size_t n_todo = 0, i = 0, interator_sz=0, pre_out_sz=0; const char * interator = NULL; char* new_out = NULL, * pre_out = NULL; size_t output_size=0; if (in_sz == 0 || in==NULL) { return 0; } n_todo = select_replace_rule(zone, rules, n_rule, todo, n_rule); interator = in; interator_sz = in_sz; for (i = 0; i < n_todo; i++) { output_size = replace_string(interator, interator_sz, todo[i], &new_out, options); if (output_size == 0) { continue; } if (pre_out != NULL) { free(pre_out); pre_out = NULL; } pre_out = new_out; pre_out_sz = output_size; interator = new_out; interator_sz = output_size; new_out=NULL; output_size=0; } if(pre_out_sz>0) { *out=pre_out; return pre_out_sz; } else { return 0; } } static char *find_insert_position(char * in) { char *insert_from = NULL; char *script_local = NULL; char *head_in = NULL; if (in == NULL) { return NULL; } head_in = strstr(in, ""); insert_from = strstr(in, "jquery"); if (insert_from != NULL && head_in != NULL && (head_in - insert_from) > 0) { script_local=strstr(insert_from, ""); if (script_local) insert_from=script_local + sizeof(""); } else { insert_from=head_in; } return insert_from; } int find_remove_position(char *start, char *end) { if(end - start <=0) { return 0; } char *tags = ALLOC(char, (end - start)+1); memcpy(tags, start, end - start); if(strstr(tags, "") != NULL || strstr(tags, "") != NULL) { free(tags); return 1; } free(tags); return 0; } void remove_string_with_tags(char *html) { char *start = NULL, *end = NULL; while ((start = strstr(html, ""); if (end != NULL) { end += 3; if (find_remove_position(start, end) &&(strstr(start, "") != NULL || strstr(start, "") != NULL)) { memmove(start, end, strlen(end) + 1); } else { html = end; } } else { break; } } return; } size_t insert_string(char * in, size_t in_sz, const char *insert_on, const char *script, const char *type, char** out) { char *target=NULL; size_t target_size=0; const char* js_style=""; const char* css_style= ""; const char* this_style=NULL; size_t concat_len=0, concat_size=0; char* concat_style=NULL; char* insert_from=NULL; size_t offset=0; if (script == NULL || in == NULL) { return 0; } if (insert_on != NULL && 0==strcasecmp(insert_on, "html_body")) { insert_from=strstr(in, ""); } else { insert_from=find_insert_position(in); } if(!insert_from) { return 0; } if (0==strcasecmp(type, "css")) { this_style=css_style; } else if (0==strcasecmp(type, "js")) { this_style=js_style; } else { assert(0); } concat_size = strlen(script)+1+strlen(this_style); concat_style = ALLOC(char, concat_size); concat_len=snprintf(concat_style, concat_size, this_style, script); target_size = in_sz+concat_len; target = ALLOC(char, target_size); assert((unsigned int)(insert_from-in) <= target_size); offset=0; memcpy(target+offset, in, insert_from-in); offset+=insert_from-in; memcpy(target+offset, concat_style, concat_len); offset+=concat_len; memcpy(target+offset, insert_from, in_sz-(insert_from-in)); offset+=in_sz-(insert_from-in); assert(target_size==offset); free(concat_style); concat_style = NULL; *out=target; return target_size; } size_t execute_insert_rule(char * in, size_t in_sz, const struct insert_rule * rules, char** out) { size_t out_size=0; if (in == NULL || in_sz < 0) { return 0; } char*new_in = ALLOC(char, in_sz+1); memcpy(new_in, in, in_sz); remove_string_with_tags(new_in); out_size = insert_string(new_in, strlen(new_in), rules->position, rules->script, rules->type, out); free(new_in); new_in=NULL; return out_size; } size_t simple_insert(char * in, size_t in_sz, const char *insert_on, const char *script, const char *type, char** out) { struct insert_rule rules; memset(&rules, 0, sizeof(rules)); rules.type=(char *)type; rules.script=(char *)script; rules.position=(char *)insert_on; rules.inject_sz=strlen(script); return execute_insert_rule(in, in_sz, &rules, out); } void simple_replace(const char* find, const char* replacement, const char* input, size_t in_sz, char** output, size_t *output_sz, int options) { char* exec_para=NULL; asprintf(&exec_para,"zone=http_resp_body;substitute=/%s/%s", find, replacement); size_t n_got_rule=0, i=0; struct replace_rule rules[16]; n_got_rule=format_replace_rule(exec_para, rules, sizeof(rules)/sizeof(rules[0])); *output_sz=execute_replace_rule(input, strlen(input), kZoneResponseBody, rules, n_got_rule, output, options); for(i=0; i