From 84ce78c1ce9036c80e5b2447993fce786c8d670e Mon Sep 17 00:00:00 2001 From: fengweihao Date: Fri, 17 Dec 2021 16:39:49 +0800 Subject: TSG-8935 修复JSON数组格式MARK标记问题 增加元素编辑自测试用例,文件名和变量变更 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plugin/business/pangu-http/CMakeLists.txt | 5 +- plugin/business/pangu-http/src/edit_element.cpp | 875 +++++++++++++++++++++ plugin/business/pangu-http/src/edit_element.h | 24 + .../business/pangu-http/src/pangu_element_edit.cpp | 764 ------------------ .../business/pangu-http/src/pangu_element_edit.h | 22 - plugin/business/pangu-http/src/pangu_http.cpp | 8 +- .../business/pangu-http/src/test_edit_element.cpp | 142 ++++ 7 files changed, 1049 insertions(+), 791 deletions(-) create mode 100644 plugin/business/pangu-http/src/edit_element.cpp create mode 100644 plugin/business/pangu-http/src/edit_element.h delete mode 100644 plugin/business/pangu-http/src/pangu_element_edit.cpp delete mode 100644 plugin/business/pangu-http/src/pangu_element_edit.h create mode 100644 plugin/business/pangu-http/src/test_edit_element.cpp (limited to 'plugin') diff --git a/plugin/business/pangu-http/CMakeLists.txt b/plugin/business/pangu-http/CMakeLists.txt index 8bee69f..ea99523 100644 --- a/plugin/business/pangu-http/CMakeLists.txt +++ b/plugin/business/pangu-http/CMakeLists.txt @@ -1,4 +1,4 @@ -add_library(pangu-http src/pangu_logger.cpp src/pangu_http.cpp src/pattern_replace.cpp src/pangu_web_cache.cpp src/pangu_element_edit.cpp) +add_library(pangu-http src/pangu_logger.cpp src/pangu_http.cpp src/pattern_replace.cpp src/pangu_web_cache.cpp src/edit_element.cpp) target_link_libraries(pangu-http PUBLIC common http tango-cache-client) target_link_libraries(pangu-http PUBLIC rdkafka ctemplate-static cjson pcre2-static ratelimiter-static libdablooms pthread) target_link_libraries(pangu-http PUBLIC maatframe) @@ -8,5 +8,8 @@ add_executable(test_pattern_replace src/test_pattern_replace.cpp src/pattern_rep target_link_libraries(test_pattern_replace common gtest pcre2-static) file(COPY test_data DESTINATION ./) +add_executable(test_edit_element src/test_edit_element.cpp src/edit_element.cpp) +target_link_libraries(test_edit_element common gtest pcre2-static libxml2-static z) + add_executable(replace_tool src/replace_tool.cpp src/pattern_replace.cpp) target_link_libraries(replace_tool common pcre2-static) diff --git a/plugin/business/pangu-http/src/edit_element.cpp b/plugin/business/pangu-http/src/edit_element.cpp new file mode 100644 index 0000000..182b88b --- /dev/null +++ b/plugin/business/pangu-http/src/edit_element.cpp @@ -0,0 +1,875 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "edit_element.h" + +int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop); +static void html_node_list(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match); +size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out); + +enum search_scope scope_name_to_id(const char * name) +{ + const char * std_name[] = {"inside_element","whole_file"}; + size_t i = 0; + for (i = 0; i < sizeof(std_name) / sizeof(const char *); i++) + { + if (0 == strcasecmp(name, std_name[i])) + { + break; + } + } + return (enum search_scope) i; +} + +int match_start_indicator(xmlNodePtr parent, char * start_indicator) +{ + if(parent->properties == NULL) + { + return 0; + } + + struct _xmlAttr *properties = parent->properties; + if(properties->children == NULL || properties->children->content == NULL) + { + return 0; + } + + xmlNodePtr children = properties->children; + if(!strcasecmp((char *)children->content, start_indicator)) + { + return 1; + } + + return 0; +} + +int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *step_array_level, const struct edit_element_rule * rules) +{ + const char *element_treatment=rules->element_treatment; + char * start_indicator = rules->start_indicator; + + if(element_treatment != NULL && !strcasecmp(element_treatment, "mark")) + { + if (rules->scope == kScopeInside) + { + if(a_element->type == cJSON_Object) + { + if(*node != NULL && strcasecmp(*node, start_indicator) != 0) + { + return 0; + } + } + if(a_element->type == cJSON_Array) + { + if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator)) + { + return 0; + } + } + } + + if(a_element->type==cJSON_Object) + { + cJSON_AddBoolToObject(a_element, "need_filter", true); + } + if(a_element->type == cJSON_Array) + { + cJSON *object = NULL; + object = cJSON_GetArrayItem(a_element, step_array_level[*step]); + if(object != NULL) + { + cJSON_AddBoolToObject(object, "need_filter", true); + } + } + } + + if(element_treatment != NULL && !strcasecmp(element_treatment, "remove")) + { + if (rules->scope == kScopeInside) + { + if(a_element->type == cJSON_Object) + { + if(*node != NULL && strcasecmp(*node, start_indicator) != 0) + { + return 0; + } + } + if(a_element->type == cJSON_Array) + { + if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator)) + { + return 0; + } + } + } + + if(a_element->type == cJSON_Object && *node != NULL) + { + cJSON_DeleteItemFromObject(a_element, *node); + } + if(a_element->type == cJSON_Array) + { + cJSON_DeleteItemFromArray(a_element, step_array_level[*step]); + } + } + + return 0; +} + +int construct_html_by_treatment(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match) +{ + int k=0; + char *new_out=NULL; + size_t output_size=0; + char * token = NULL, * saveptr = NULL; + + if(strcasestr((char *)node->content, "var ytInitialData")) + { + token = strtok_r((char *)node->content, "=", &saveptr); + if(token != NULL && ((saveptr[0] == '{') || (saveptr[1] == '{'))) + { + output_size = parse_json_output_unformatted(saveptr, strlen(saveptr), rules, &new_out); + if(output_size != 0 && new_out != NULL) + { + new_out[output_size] = ';'; + + FREE(&node->content); + + node->content = (xmlChar*)new_out; + *match =1; + return 0; + } + } + } + + if(strcasestr((char *)node->content, rules->contained_keyword) == NULL) + { + return 0; + } + + char * start_indicator = rules->start_indicator; + const char *element_treatment=rules->element_treatment; + int distane_from_matching = (rules->distane_from_matching + 1); + + if(element_treatment != NULL && !strcasecmp(element_treatment, "mark")) + { + xmlNodePtr parent = node->parent; + k++; + while (parent != NULL) + { + if(k == distane_from_matching) + { + if (rules->scope == kScopeInside && match_start_indicator(parent, start_indicator) == 0) + { + break; + } + + xmlNewProp(parent, (const xmlChar *)"need_filter", (const xmlChar *)"true"); + *match =1; + break; + } + k++; + parent = parent->parent; + } + } + + if(element_treatment != NULL && !strcasecmp(element_treatment, "remove")) + { + xmlNodePtr parent = node->parent; + k++; + while (parent != NULL) + { + if(k == distane_from_matching) + { + if (rules->scope == kScopeInside && match_start_indicator(parent, start_indicator) == 0) + { + break; + } + + /*This is the top floor, Don't deal with**/ + if(parent->parent == NULL) + { + break; + } + + if(*n_parent < 16) + { + parent_array[*n_parent] = parent; + *n_parent = *n_parent+1; + } + + *match =1; + break; + } + + k++; + parent = parent->parent; + } + } + + return 0; +} + +int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_array_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop) +{ + int xret=0, array_cnt=0; + + cJSON *a_element = a->child; + *step= *step + 1; + for (; (a_element != NULL);) + { + xret = cjson_element_foreach(a_element, depth, step, step_array_level, node, rules, match_num, loop); + if(xret == -1) + { + return -1; + } + if(*depth == 0) + { + construct_cjson_by_treatment(a_element, node, step, step_array_level, rules); + } + if(xret == 1) + { + *step = (*step >= 2047) ? 2047 : *step; + step_array_level[*step] = array_cnt; + *node = a_element->string; + *depth = *depth -1; + return 1; + } + array_cnt++; + a_element = a_element->next; + } + + return xret; +} + +int cjson_dump_object(cJSON *a, int *depth, int *step, int *step_array_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop) +{ + int xret=0; + cJSON *a_element=NULL; + + cJSON_ArrayForEach(a_element, a) + { + xret = cjson_element_foreach(a_element, depth, step, step_array_level, node, rules, match_num, loop); + if(xret == -1) + { + return -1; + } + if(*depth == 0) + { + construct_cjson_by_treatment(a_element, node, step, step_array_level, rules); + } + if(xret == 1) + { + *node = a_element->string; + *depth = *depth -1; + return 1; + } + } + return xret; +} + +int cjson_dump_string(cJSON *a, int *depth, const struct edit_element_rule * rules, int *match_num, int loop) +{ + int xret=0; + + if((a->valuestring != NULL) && strcasestr(a->valuestring, rules->contained_keyword)) + { + if(*depth != -1) + { + if(!strcasecmp(rules->element_treatment, "mark")) + { + if(*match_num == loop) + { + xret = 1; + goto finish; + } + } + else + { + xret = 1; + goto finish; + + } + } + *match_num = *match_num + 1; + } +finish: + return xret; +} + +int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_array_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop) +{ + if ((a == NULL) || cJSON_IsInvalid(a)) + { + return -1; + } + + switch (a->type & 0xFF) + { + case cJSON_String: + case cJSON_Raw: + return cjson_dump_string(a, depth, rules, match_num, loop); + + case cJSON_Array: + return cjson_dump_array(a, depth, step, step_array_level, node, rules, match_num, loop); + + case cJSON_Object: + return cjson_dump_object(a, depth, step, step_array_level, node, rules, match_num, loop); + + case cJSON_Number: + case cJSON_False: + case cJSON_True: + case cJSON_NULL: + return 0; + default: + return -1; + } + + return 0; +} + +static void html_namespace_list(xmlNsPtr ns) +{ + while (ns != NULL) + { + ns = ns->next; + } +} + +static void html_attr_list(const struct edit_element_rule * rules, xmlAttrPtr attr, xmlNodePtr *parent_array, size_t *n_parent, int *match) +{ + while (attr != NULL) + { + if (attr->children != NULL) + { + html_node_list(rules, attr->children, parent_array, n_parent, match); + } + + attr = attr->next; + } +} + +static void html_dump_one_node(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match) +{ + switch (node->type) + { + case XML_ELEMENT_NODE: + case XML_ELEMENT_DECL: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + case XML_NOTATION_NODE: + case XML_TEXT_NODE: + break; + + default: + return; + } + if ((node->type == XML_ELEMENT_NODE) && (node->nsDef != NULL)) + { + html_namespace_list(node->nsDef); + } + + if ((node->type == XML_ELEMENT_NODE) && (node->properties != NULL)) + { + html_attr_list(rules, node->properties, parent_array, n_parent, match); + } + + if (node->type != XML_ENTITY_REF_NODE) + { + if ((node->type != XML_ELEMENT_NODE) && (node->content != NULL)) + { + construct_html_by_treatment(rules, node, parent_array, n_parent, match); + } + } +} + +static void html_dump_node(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match) +{ + html_dump_one_node(rules, node, parent_array, n_parent, match); + if ((node->type != XML_NAMESPACE_DECL) && (node->children != NULL) && (node->type != XML_ENTITY_REF_NODE)) + { + html_node_list(rules, node->children, parent_array, n_parent, match); + } +} + +static void html_node_list(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match) +{ + while (node != NULL) + { + html_dump_node(rules, node, parent_array, n_parent, match); + node = node->next; + } +} + +static void html_element_foreach(const struct edit_element_rule * rules, xmlDocPtr doc, xmlNodePtr *parent_array, size_t *n_parent, int *match) +{ + if (((doc->type == XML_DOCUMENT_NODE) || (doc->type == XML_HTML_DOCUMENT_NODE)) && (doc->children != NULL)) + { + html_node_list(rules, doc->children, parent_array, n_parent, match); + } +} + +size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out) +{ + int match_num_peer=0; + int step=0, depth=0, match_num=0,i=0, match=0; + cJSON* interator=NULL; + char* new_out = NULL, *node=NULL; + size_t outlen=0; + char *element_treatment=NULL; + + int step_array_level[2048] = {0}; + + interator = cJSON_Parse(in); + if(interator==NULL) + { + goto finish; + } + + depth = -1; + element_treatment=rules->element_treatment; + + cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, 0); + match_num_peer = match_num; + for(i=0; i< match_num_peer; i++) + { + depth = (rules->distane_from_matching + 1); + step=0; node=NULL; match_num=0; + memset(step_array_level, 0, sizeof(step_array_level)); + + match |= cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, i); + + if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0) + { + cJSON_DeleteItemFromObject(interator, node); + } + } + + if(match==1 && element_treatment != NULL && !strcasecmp(element_treatment, "mark")) + { + if(interator->type==cJSON_Object) + { + cJSON_AddBoolToObject(interator, "need_check", true); + } + if(interator->type==cJSON_Array) + { + cJSON *child = interator->child; + for (; (child != NULL);) + { + cJSON_AddBoolToObject(child, "need_check", true); + child = child->next; + } + } + } + + new_out = cJSON_PrintUnformatted(interator); + if(new_out!=NULL) + { + *out = new_out; + outlen = strlen(new_out); + } + +finish: + if(interator != NULL) + cJSON_Delete(interator); + return outlen; +} + +size_t format_json_file_type(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out) +{ + int match_num_peer=0; + int step=0, depth=0, match=0, i=0; + cJSON* interator=NULL; + char* new_out = NULL, *node=NULL; + size_t outlen=0; int match_num=0; + char *element_treatment=NULL; + + int step_array_level[2048] = {0}; + + char*new_in = ALLOC(char, in_sz+1); + memcpy(new_in, in, in_sz); + + interator = cJSON_Parse(new_in); + if(interator==NULL) + { + goto finish; + } + + depth = -1; + element_treatment=rules->element_treatment; + + /*When the node has inclusion relation, cjson is not null when deleted + So multiple loops delete **/ + cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, 0); + match_num_peer = match_num; + for(i=0; i< match_num_peer; i++) + { + depth = (rules->distane_from_matching + 1); + step=0; node=NULL; match_num=0; + memset(step_array_level, 0, sizeof(step_array_level)); + + match |= cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, i); + + if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0) + { + cJSON_DeleteItemFromObject(interator, node); + } + + match_num--; + } + + if(match == 0) + { + goto finish; + } + + if(element_treatment != NULL && !strcasecmp(element_treatment, "mark")) + { + if(interator->type==cJSON_Object) + { + cJSON_AddBoolToObject(interator, "need_check", true); + } + + if(interator->type==cJSON_Array) + { + cJSON *child = interator->child; + for (; (child != NULL);) + { + cJSON_AddBoolToObject(child, "need_check", true); + child = child->next; + } + } + } + + new_out = cJSON_PrintUnformatted(interator); + if(new_out!=NULL) + { + *out = new_out; + outlen = strlen(*out); + } + +finish: + if(interator != NULL) + cJSON_Delete(interator); + FREE(&new_in); + return outlen; +} + +size_t format_multidelete_json_type(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out) +{ + char *new_out=NULL, *pre_out=NULL; + char * tmp = ALLOC(char, in_sz+1); + char * token = NULL, * sub_token = NULL, * saveptr = NULL; + size_t output_size = 0; + + size_t new_out_len=0; + /**Follow-up optimization */ + new_out = ALLOC(char, in_sz+in_sz/3); + + memcpy(tmp, in, in_sz); + + for (token = tmp;; token = NULL) + { + sub_token = strtok_r(token, "\n", &saveptr); + if (sub_token == NULL) + { + new_out[new_out_len-2]='\0'; + break; + } + output_size = parse_json_output_unformatted(sub_token, strlen(sub_token), rules, &pre_out); + if(output_size>0 && pre_out!=NULL) + { + memcpy(new_out+new_out_len, pre_out, strlen(pre_out)); + new_out_len += strlen(pre_out); + memcpy(new_out+new_out_len, "\r\n", 2); + new_out_len +=2; + FREE(&pre_out); + } + } + + if(new_out) + { + *out = new_out; + output_size = strlen(new_out); + } + + free(tmp); + tmp = NULL; + return output_size; +} + +size_t construct_format_html(htmlDocPtr doc, char**out) +{ + size_t outlen=0; + xmlBufferPtr out_buffer; + const xmlChar *xmlCharBuffer; + xmlSaveCtxtPtr saveCtxtPtr; + + out_buffer = xmlBufferCreate(); + if (out_buffer == NULL) + { + goto finish; + } + + saveCtxtPtr = xmlSaveToBuffer(out_buffer, "UTF-8", XML_SAVE_NO_DECL | XML_SAVE_AS_HTML); + if (xmlSaveDoc(saveCtxtPtr, doc) < 0) + { + goto finish; + } + xmlSaveClose(saveCtxtPtr); + + xmlCharBuffer = xmlBufferContent(out_buffer); + if(xmlCharBuffer != NULL) + { + char*new_out = ALLOC(char, strlen((char *)xmlCharBuffer)+1); + memcpy(new_out, (char *)xmlCharBuffer, strlen((char *)xmlCharBuffer)); + + *out = new_out; + outlen = strlen((char *)xmlCharBuffer); + } + +finish: + if(out_buffer != NULL) + { + xmlBufferFree(out_buffer); + } + + return outlen; +} + +size_t format_input_html(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out) +{ + size_t outlen=0, n_parent=0, n_parent_peer=0; + int match=0, i=0; + htmlDocPtr doc = NULL; + const char *element_treatment=NULL; + xmlNodePtr parent_array[16]; + int options = XML_PARSE_NOERROR | HTML_PARSE_NODEFDTD; + + doc = htmlReadMemory(in, in_sz, NULL, NULL, options); + if (doc == NULL) + { + goto finish; + } + + /*When the node has inclusion relation, libxml2 is not null when deleted + So multiple loops delete **/ + html_element_foreach(rules, doc, parent_array, &n_parent, &match); + if(match != 1) + { + goto finish; + } + + n_parent_peer = n_parent; + element_treatment=rules->element_treatment; + + if(element_treatment != NULL && !strcasecmp(element_treatment, "remove")) + { + for(i=0; i < (int)n_parent_peer; i++) + { + match =0; n_parent = 0; + html_element_foreach(rules, doc, parent_array, &n_parent, &match); + if(match == 1) + { + xmlUnlinkNode(parent_array[0]); + xmlFreeNode(parent_array[0]); + } + } + } + + if(element_treatment != NULL && !strcasecmp(element_treatment, "mark")) + { + if(doc->children != NULL && doc->children->next != NULL) + { + xmlNewProp(doc->children->next, (const xmlChar *)"need_check", (const xmlChar *)"true"); + } + else if(doc->children != NULL) + { + xmlNewProp(doc->children, (const xmlChar *)"need_check", (const xmlChar *)"true"); + } + } + + outlen = construct_format_html(doc, out); + if(outlen<=0) + { + outlen=0; + } + +finish: + if(doc!=NULL) + { + xmlFreeDoc(doc); + } + return outlen; +} + +size_t format_html_file_type(const char * interator, size_t interator_sz, const struct edit_element_rule *rule, char **new_out) +{ + size_t output_size=0; + + if(interator[0] == '{') + { + output_size = format_multidelete_json_type(interator, interator_sz, rule, new_out); + } + else + { + output_size = format_input_html(interator, interator_sz, rule, new_out); + } + + return output_size; +} + +size_t parse_string(const char * interator, size_t interator_sz, const struct edit_element_rule *rule, char **new_out, int options) +{ + size_t output_size=0; + + if(options) + { + output_size = format_json_file_type(interator, interator_sz, rule, new_out); + } + else + { + output_size = format_html_file_type(interator, interator_sz, rule, new_out); + } + + return output_size; +} + +size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct edit_element_rule *rules, size_t n_rule, char** out, int options) +{ + const struct edit_element_rule * todo = rules; + size_t i = 0, interator_sz=0, pre_out_sz=0; + const char * interator = NULL; + char* new_out = NULL, * pre_out = NULL; + size_t output_size=0; + if (in_sz == 0 || in==NULL) + { + return 0; + } + interator = in; + interator_sz = in_sz; + for (i = 0; i < n_rule; i++) + { + output_size = parse_string(interator, interator_sz, &(todo[i]), &new_out, options); + if (output_size == 0) + { + continue; + } + if (pre_out != NULL) + { + free(pre_out); + pre_out = NULL; + } + pre_out = new_out; + pre_out_sz = output_size; + + interator = new_out; + interator_sz = output_size; + + new_out=NULL; + output_size=0; + } + if(pre_out_sz>0) + { + *out=pre_out; + return pre_out_sz; + } + else + { + return 0; + } +} + +size_t __attribute__((__unused__)) +format_edit_element_rule(struct edit_element_rule *edit_element, const char *user_region, size_t n_edit_element) +{ + size_t idx=0; + cJSON *json=NULL, *rules=NULL, *item=NULL, *sub_item=NULL; + + json=cJSON_Parse(user_region); + if(json !=NULL ) + { + rules = cJSON_GetObjectItem(json, "rules"); + if(rules == NULL) + { + goto finish; + } + + idx = 0; + for (item = rules->child; item != NULL; item = item->next) + { + sub_item=cJSON_GetObjectItem(item,"anchor_element"); + if(sub_item != NULL && sub_item->type ==cJSON_Object) + { + char * search_scope = cJSON_GetObjectItem(sub_item , "search_scope")->valuestring; + if (search_scope == NULL) break; + + edit_element[idx].scope = scope_name_to_id(search_scope); + if (edit_element[idx].scope == KScopeMax) + { + break; + } + if(edit_element[idx].scope == kScopeInside) + { + edit_element[idx].start_indicator = tfe_strdup(cJSON_GetObjectItem(sub_item , "start_indicator")->valuestring); + } + edit_element[idx].contained_keyword = tfe_strdup(cJSON_GetObjectItem(sub_item,"contained_keyword")->valuestring); + } + + sub_item=cJSON_GetObjectItem(item,"target_element"); + if(sub_item != NULL && sub_item->type ==cJSON_Object) + { + edit_element[idx].distane_from_matching = cJSON_GetObjectItem(sub_item , "target_distance_from_matching")->valueint; + edit_element[idx].element_treatment = tfe_strdup(cJSON_GetObjectItem(sub_item,"element_treatment")->valuestring); + } + + if (idx == n_edit_element) + { + break; + } + + idx++; + } + } + +finish: + if (json) cJSON_Delete(json); + return idx; +} + +void simple_edit_element(const char *user_region, const char* input, size_t in_sz, char** output, size_t *output_sz, int options) +{ + size_t n_got_rule=0, i=0; + struct edit_element_rule rules[16]; + memset(rules, 0, sizeof(struct edit_element_rule)*16); + + n_got_rule=format_edit_element_rule(rules, user_region, sizeof(rules)/sizeof(rules[0])); + *output_sz=execute_edit_element_rule(input, strlen(input), rules, n_got_rule, output, options); + for(i=0; i + +enum search_scope +{ + kScopeInside = 0, + kScopeWhole, + KScopeMax +}; + +struct edit_element_rule +{ + enum search_scope scope; + int distane_from_matching; + char * start_indicator; + char *element_treatment; + char * contained_keyword; +}; + +size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct edit_element_rule *rules, size_t n_rule, char** out, int options); +enum search_scope scope_name_to_id(const char * name); + +void simple_edit_element(const char *user_region, const char* input, size_t in_sz, char** output, size_t *output_sz, int options); + diff --git a/plugin/business/pangu-http/src/pangu_element_edit.cpp b/plugin/business/pangu-http/src/pangu_element_edit.cpp deleted file mode 100644 index 264186d..0000000 --- a/plugin/business/pangu-http/src/pangu_element_edit.cpp +++ /dev/null @@ -1,764 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "pangu_element_edit.h" - -int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop); -static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match); -size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out); - -enum search_scope scope_name_to_id(const char * name) -{ - const char * std_name[] = {"inside_element","whole_file"}; - size_t i = 0; - for (i = 0; i < sizeof(std_name) / sizeof(const char *); i++) - { - if (0 == strcasecmp(name, std_name[i])) - { - break; - } - } - return (enum search_scope) i; -} - -int match_start_indicator(xmlNodePtr parent, char * start_indicator) -{ - if(parent->properties == NULL) - { - return 0; - } - - struct _xmlAttr *properties = parent->properties; - if(properties->children == NULL || properties->children->content == NULL) - { - return 0; - } - - xmlNodePtr children = properties->children; - if(!strcasecmp((char *)children->content, start_indicator)) - { - return 1; - } - - return 0; -} - -int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *step_level, const struct element_rule * rules) -{ - const char *element_treatment=rules->element_treatment; - char * start_indicator = rules->start_indicator; - - if(element_treatment != NULL && !strcasecmp(element_treatment, "mark")) - { - if (rules->scope == kScopeInside) - { - if(a_element->type == cJSON_Object) - { - if(*node != NULL && strcasecmp(*node, start_indicator) != 0) - { - return 0; - } - } - if(a_element->type == cJSON_Array) - { - if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator)) - { - return 0; - } - } - } - - if(a_element->type==cJSON_Object) - { - cJSON_AddBoolToObject(a_element, "need_filter", true); - } - } - - if(element_treatment != NULL && !strcasecmp(element_treatment, "remove")) - { - if (rules->scope == kScopeInside) - { - if(a_element->type == cJSON_Object) - { - if(*node != NULL && strcasecmp(*node, start_indicator) != 0) - { - return 0; - } - } - if(a_element->type == cJSON_Array) - { - if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator)) - { - return 0; - } - } - } - - if(a_element->type == cJSON_Object && *node != NULL) - { - cJSON_DeleteItemFromObject(a_element, *node); - } - if(a_element->type == cJSON_Array) - { - cJSON_DeleteItemFromArray(a_element, step_level[*step]); - } - } - - return 0; -} - -int construct_html_by_treatment(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match) -{ - int k=0; - char *new_out=NULL; - size_t output_size=0; - char * token = NULL, * saveptr = NULL; - - if(strcasestr((char *)node->content, "var ytInitialData")) - { - token = strtok_r((char *)node->content, "=", &saveptr); - if(token != NULL && ((saveptr[0] == '{') || (saveptr[1] == '{'))) - { - output_size = parse_json_output_unformatted(saveptr, strlen(saveptr), rules, &new_out); - if(output_size != 0 && new_out != NULL) - { - new_out[output_size] = ';'; - - FREE(&node->content); - - node->content = (xmlChar*)new_out; - *match =1; - return 0; - } - } - } - - if(strcasestr((char *)node->content, rules->contained_keyword) == NULL) - { - return 0; - } - - char * start_indicator = rules->start_indicator; - const char *element_treatment=rules->element_treatment; - int distane_from_matching = (rules->distane_from_matching + 1); - - if(element_treatment != NULL && !strcasecmp(element_treatment, "mark")) - { - xmlNodePtr parent = node->parent; - k++; - while (parent != NULL) - { - if(k == distane_from_matching) - { - if (rules->scope == kScopeInside && match_start_indicator(parent, start_indicator) == 0) - { - break; - } - - xmlNewProp(parent, (const xmlChar *)"need_filter", (const xmlChar *)"true"); - *match =1; - break; - } - k++; - parent = parent->parent; - } - } - - if(element_treatment != NULL && !strcasecmp(element_treatment, "remove")) - { - xmlNodePtr parent = node->parent; - k++; - while (parent != NULL) - { - if(k == distane_from_matching) - { - if (rules->scope == kScopeInside && match_start_indicator(parent, start_indicator) == 0) - { - break; - } - - /*This is the top floor, Don't deal with**/ - if(parent->parent == NULL) - { - break; - } - - if(*n_parent < 16) - { - parent_array[*n_parent] = parent; - *n_parent = *n_parent+1; - } - - *match =1; - break; - } - - k++; - parent = parent->parent; - } - } - - return 0; -} - -int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop) -{ - int xret=0, array_cnt=0; - - cJSON *a_element = a->child; - *step= *step + 1; - for (; (a_element != NULL);) - { - xret = cjson_element_foreach(a_element, depth, step, step_level, node, rules, match_num, loop); - if(xret == -1) - { - return -1; - } - if(*depth == 0) - { - construct_cjson_by_treatment(a_element, node, step, step_level, rules); - } - if(xret == 1) - { - step_level[*step] = array_cnt; - *node = a_element->string; - *depth = *depth -1; - return 1; - } - array_cnt++; - a_element = a_element->next; - } - - return xret; -} - -int cjson_dump_object(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop) -{ - int xret=0; - cJSON *a_element=NULL; - - cJSON_ArrayForEach(a_element, a) - { - xret = cjson_element_foreach(a_element, depth, step, step_level, node, rules, match_num, loop); - if(xret == -1) - { - return -1; - } - if(*depth == 0) - { - construct_cjson_by_treatment(a_element, node, step, step_level, rules); - } - if(xret == 1) - { - *node = a_element->string; - *depth = *depth -1; - return 1; - } - } - return xret; -} - -int cjson_dump_string(cJSON *a, int *depth, const struct element_rule * rules, int *match_num, int loop) -{ - int xret=0; - - if((a->valuestring != NULL) && strcasestr(a->valuestring, rules->contained_keyword)) - { - if(*depth != -1) - { - if(!strcasecmp(rules->element_treatment, "mark")) - { - if(*match_num == loop) - { - xret = 1; - goto finish; - } - } - else - { - xret = 1; - goto finish; - - } - } - *match_num = *match_num + 1; - } -finish: - return xret; -} - -int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop) -{ - if ((a == NULL) || cJSON_IsInvalid(a)) - { - return -1; - } - - switch (a->type & 0xFF) - { - case cJSON_String: - case cJSON_Raw: - return cjson_dump_string(a, depth, rules, match_num, loop); - - case cJSON_Array: - return cjson_dump_array(a, depth, step, step_level, node, rules, match_num, loop); - - case cJSON_Object: - return cjson_dump_object(a, depth, step, step_level, node, rules, match_num, loop); - - case cJSON_Number: - case cJSON_False: - case cJSON_True: - case cJSON_NULL: - return 0; - default: - return -1; - } - - return 0; -} - -static void html_namespace_list(xmlNsPtr ns) -{ - while (ns != NULL) - { - ns = ns->next; - } -} - -static void html_attr_list(const struct element_rule * rules, xmlAttrPtr attr, xmlNodePtr *parent_array, size_t *n_parent, int *match) -{ - while (attr != NULL) - { - if (attr->children != NULL) - { - html_node_list(rules, attr->children, parent_array, n_parent, match); - } - - attr = attr->next; - } -} - -static void html_dump_one_node(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match) -{ - switch (node->type) - { - case XML_ELEMENT_NODE: - case XML_ELEMENT_DECL: - case XML_CDATA_SECTION_NODE: - case XML_ENTITY_REF_NODE: - case XML_ENTITY_NODE: - case XML_PI_NODE: - case XML_COMMENT_NODE: - case XML_DOCUMENT_TYPE_NODE: - case XML_DOCUMENT_FRAG_NODE: - case XML_NOTATION_NODE: - case XML_TEXT_NODE: - break; - - default: - return; - } - if ((node->type == XML_ELEMENT_NODE) && (node->nsDef != NULL)) - { - html_namespace_list(node->nsDef); - } - - if ((node->type == XML_ELEMENT_NODE) && (node->properties != NULL)) - { - html_attr_list(rules, node->properties, parent_array, n_parent, match); - } - - if (node->type != XML_ENTITY_REF_NODE) - { - if ((node->type != XML_ELEMENT_NODE) && (node->content != NULL)) - { - construct_html_by_treatment(rules, node, parent_array, n_parent, match); - } - } -} - -static void html_dump_node(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match) -{ - html_dump_one_node(rules, node, parent_array, n_parent, match); - if ((node->type != XML_NAMESPACE_DECL) && (node->children != NULL) && (node->type != XML_ENTITY_REF_NODE)) - { - html_node_list(rules, node->children, parent_array, n_parent, match); - } -} - -static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match) -{ - while (node != NULL) - { - html_dump_node(rules, node, parent_array, n_parent, match); - node = node->next; - } -} - -static void html_element_foreach(const struct element_rule * rules, xmlDocPtr doc, xmlNodePtr *parent_array, size_t *n_parent, int *match) -{ - if (((doc->type == XML_DOCUMENT_NODE) || (doc->type == XML_HTML_DOCUMENT_NODE)) && (doc->children != NULL)) - { - html_node_list(rules, doc->children, parent_array, n_parent, match); - } -} - -size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out) -{ - int match_num_peer=0; - int step=0, depth=0, match_num=0,i=0, match=0; - cJSON* interator=NULL; - char* new_out = NULL, *node=NULL; - size_t outlen=0; - char *element_treatment=NULL; - - int step_level[2048] = {0}; - - interator = cJSON_Parse(in); - if(interator==NULL) - { - goto finish; - } - - depth = -1; - element_treatment=rules->element_treatment; - - cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, 0); - match_num_peer = match_num; - for(i=0; i< match_num_peer; i++) - { - depth = (rules->distane_from_matching + 1); - step=0; node=NULL; match_num=0; - memset(step_level, 0, sizeof(step_level)); - - match |= cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, i); - - if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0) - { - cJSON_DeleteItemFromObject(interator, node); - } - } - - if(match==1 && element_treatment != NULL && !strcasecmp(element_treatment, "mark")) - { - if(interator->type==cJSON_Object) - { - cJSON_AddBoolToObject(interator, "need_check", true); - } - } - - new_out = cJSON_PrintUnformatted(interator); - if(new_out!=NULL) - { - *out = new_out; - outlen = strlen(new_out); - } - -finish: - if(interator != NULL) - cJSON_Delete(interator); - return outlen; -} - -size_t format_json_file_type(const char * in, size_t in_sz, const struct element_rule * rules, char** out) -{ - int match_num_peer=0; - int step=0, depth=0, match=0, i=0; - cJSON* interator=NULL; - char* new_out = NULL, *node=NULL; - size_t outlen=0; int match_num=0; - char *element_treatment=NULL; - - int step_level[2048] = {0}; - - char*new_in = ALLOC(char, in_sz+1); - memcpy(new_in, in, in_sz); - - interator = cJSON_Parse(new_in); - if(interator==NULL) - { - goto finish; - } - - depth = -1; - element_treatment=rules->element_treatment; - - /*When the node has inclusion relation, cjson is not null when deleted - So multiple loops delete **/ - cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, 0); - match_num_peer = match_num; - for(i=0; i< match_num_peer; i++) - { - depth = (rules->distane_from_matching + 1); - step=0; node=NULL; match_num=0; - memset(step_level, 0, sizeof(step_level)); - - match |= cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, i); - - if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0) - { - cJSON_DeleteItemFromObject(interator, node); - } - - match_num--; - } - - if(match == 0) - { - goto finish; - } - - if(element_treatment != NULL && !strcasecmp(element_treatment, "mark")) - { - if(interator->type==cJSON_Object) - { - cJSON_AddBoolToObject(interator, "need_check", true); - } - } - - new_out = cJSON_Print(interator); - if(new_out!=NULL) - { - *out = new_out; - outlen = strlen(*out); - } - -finish: - if(interator != NULL) - cJSON_Delete(interator); - FREE(&new_in); - return outlen; -} - -size_t format_multidelete_json_type(const char * in, size_t in_sz, const struct element_rule * rules, char** out) -{ - char *new_out=NULL, *pre_out=NULL; - char * tmp = ALLOC(char, in_sz+1); - char * token = NULL, * sub_token = NULL, * saveptr = NULL; - size_t output_size = 0; - - size_t new_out_len=0; - /**Follow-up optimization */ - new_out = ALLOC(char, in_sz+in_sz/3); - - memcpy(tmp, in, in_sz); - - for (token = tmp;; token = NULL) - { - sub_token = strtok_r(token, "\n", &saveptr); - if (sub_token == NULL) - { - new_out[new_out_len-2]='\0'; - break; - } - output_size = parse_json_output_unformatted(sub_token, strlen(sub_token), rules, &pre_out); - if(output_size>0 && pre_out!=NULL) - { - memcpy(new_out+new_out_len, pre_out, strlen(pre_out)); - new_out_len += strlen(pre_out); - memcpy(new_out+new_out_len, "\r\n", 2); - new_out_len +=2; - FREE(&pre_out); - } - } - - if(new_out) - { - *out = new_out; - output_size = strlen(new_out); - } - - free(tmp); - tmp = NULL; - return output_size; -} - -size_t construct_format_html(htmlDocPtr doc, char**out) -{ - size_t outlen=0; - xmlBufferPtr out_buffer; - const xmlChar *xmlCharBuffer; - xmlSaveCtxtPtr saveCtxtPtr; - - out_buffer = xmlBufferCreate(); - if (out_buffer == NULL) - { - goto finish; - } - - saveCtxtPtr = xmlSaveToBuffer(out_buffer, "UTF-8", XML_SAVE_NO_DECL); - if (xmlSaveDoc(saveCtxtPtr, doc) < 0) - { - goto finish; - } - xmlSaveClose(saveCtxtPtr); - - xmlCharBuffer = xmlBufferContent(out_buffer); - if(xmlCharBuffer != NULL) - { - char*new_out = ALLOC(char, strlen((char *)xmlCharBuffer)+1); - memcpy(new_out, (char *)xmlCharBuffer, strlen((char *)xmlCharBuffer)); - - *out = new_out; - outlen = strlen((char *)xmlCharBuffer); - } - -finish: - if(out_buffer != NULL) - { - xmlBufferFree(out_buffer); - } - - return outlen; -} - -size_t format_input_html(const char * in, size_t in_sz, const struct element_rule * rules, char** out) -{ - size_t outlen=0, n_parent=0, n_parent_peer=0; - int match=0, i=0; - htmlDocPtr doc = NULL; - const char *element_treatment=NULL; - xmlNodePtr parent_array[16]; - - int options = XML_PARSE_NOERROR; - - doc = htmlReadMemory(in, in_sz, NULL, NULL, options); - if (doc == NULL) - { - goto finish; - } - - /*When the node has inclusion relation, libxml2 is not null when deleted - So multiple loops delete **/ - html_element_foreach(rules, doc, parent_array, &n_parent, &match); - if(match != 1) - { - goto finish; - } - - n_parent_peer = n_parent; - element_treatment=rules->element_treatment; - - if(element_treatment != NULL && !strcasecmp(element_treatment, "remove")) - { - for(i=0; i < (int)n_parent_peer; i++) - { - match =0; n_parent = 0; - html_element_foreach(rules, doc, parent_array, &n_parent, &match); - if(match == 1) - { - xmlUnlinkNode(parent_array[0]); - xmlFreeNode(parent_array[0]); - } - } - } - - if(element_treatment != NULL && !strcasecmp(element_treatment, "mark")) - { - if(doc->children != NULL && doc->children->next != NULL) - { - xmlNewProp(doc->children->next, (const xmlChar *)"need_check", (const xmlChar *)"true"); - } - } - - outlen = construct_format_html(doc, out); - if(outlen<=0) - { - outlen=0; - } - -finish: - if(doc!=NULL) - { - xmlFreeDoc(doc); - } - return outlen; -} - -size_t format_html_file_type(const char * interator, size_t interator_sz, const struct element_rule *rule, char **new_out) -{ - size_t output_size=0; - - if(interator[0] == '{') - { - output_size = format_multidelete_json_type(interator, interator_sz, rule, new_out); - } - else - { - output_size = format_input_html(interator, interator_sz, rule, new_out); - } - - return output_size; -} - -size_t parse_string(const char * interator, size_t interator_sz, const struct element_rule *rule, char **new_out, int options) -{ - size_t output_size=0; - - if(options) - { - output_size = format_json_file_type(interator, interator_sz, rule, new_out); - } - else - { - output_size = format_html_file_type(interator, interator_sz, rule, new_out); - } - - return output_size; -} - -size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct element_rule *rules, size_t n_rule, char** out, int options) -{ - const struct element_rule * todo = rules; - size_t i = 0, interator_sz=0, pre_out_sz=0; - const char * interator = NULL; - char* new_out = NULL, * pre_out = NULL; - size_t output_size=0; - if (in_sz == 0 || in==NULL) - { - return 0; - } - interator = in; - interator_sz = in_sz; - for (i = 0; i < n_rule; i++) - { - output_size = parse_string(interator, interator_sz, &(todo[i]), &new_out, options); - if (output_size == 0) - { - continue; - } - if (pre_out != NULL) - { - free(pre_out); - pre_out = NULL; - } - pre_out = new_out; - pre_out_sz = output_size; - - interator = new_out; - interator_sz = output_size; - - new_out=NULL; - output_size=0; - } - if(pre_out_sz>0) - { - *out=pre_out; - return pre_out_sz; - } - else - { - return 0; - } -} - diff --git a/plugin/business/pangu-http/src/pangu_element_edit.h b/plugin/business/pangu-http/src/pangu_element_edit.h deleted file mode 100644 index d431f5b..0000000 --- a/plugin/business/pangu-http/src/pangu_element_edit.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once -#include - -enum search_scope -{ - kScopeInside = 0, - kScopeWhole, - KScopeMax -}; - -struct element_rule -{ - enum search_scope scope; - int distane_from_matching; - char * start_indicator; - char *element_treatment; - char * contained_keyword; -}; - -size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct element_rule *rules, size_t n_rule, char** out, int options); -enum search_scope scope_name_to_id(const char * name); - diff --git a/plugin/business/pangu-http/src/pangu_http.cpp b/plugin/business/pangu-http/src/pangu_http.cpp index 8ce3ccc..37ead64 100644 --- a/plugin/business/pangu-http/src/pangu_http.cpp +++ b/plugin/business/pangu-http/src/pangu_http.cpp @@ -1,5 +1,5 @@ #include "pangu_logger.h" -#include "pangu_element_edit.h" +#include "edit_element.h" #include "pattern_replace.h" #include "pangu_web_cache.h" @@ -126,7 +126,7 @@ struct policy_action_param struct replace_rule *repl_rule; size_t e_rule; - struct element_rule *elem_rule; + struct edit_element_rule *elem_rule; pthread_mutex_t lock; }; @@ -486,7 +486,7 @@ void policy_action_param_new(int idx, const struct Maat_rule_t* rule, const char break; } rule_id = 0; - param->elem_rule = ALLOC(struct element_rule, MAX_EDIT_ZONE_NUM); + param->elem_rule = ALLOC(struct edit_element_rule, MAX_EDIT_ZONE_NUM); for (item = rules->child; item != NULL; item = item->next) { sub_item=cJSON_GetObjectItem(item,"anchor_element"); @@ -995,7 +995,7 @@ struct insert_ctx struct edit_element_ctx { - struct element_rule *item; + struct edit_element_rule *item; size_t n_item; struct tfe_http_half * editing; struct evbuffer *http_body; diff --git a/plugin/business/pangu-http/src/test_edit_element.cpp b/plugin/business/pangu-http/src/test_edit_element.cpp new file mode 100644 index 0000000..a16fc4b --- /dev/null +++ b/plugin/business/pangu-http/src/test_edit_element.cpp @@ -0,0 +1,142 @@ +#include +#include "edit_element.h" + +#include +#include +#include +#include +#include + +TEST(EditElement, Libxml_Whole_Remove_Index01) +{ + char* output=NULL; + size_t output_sz=0; + + const char *input = "\n\n\nindex\n\n\n\n

body

\n

hello world

\n跳转\n\n\n\n\n"; + + const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"whole_file\",\"contained_keyword\":\"hello world\"},\ + \"target_element\":{\"target_distance_from_matching\":0,\"element_treatment\":\"remove\"}}]}"; + + simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0); + EXPECT_TRUE(output_sz>0); + + printf("output = %s\n", output); + EXPECT_TRUE(NULL==strstr(output, "hello world")); + free(output); +} + +TEST(EditElement, Libxml_Whole_Mark_Index01) +{ + char* output=NULL; + size_t output_sz=0; + + const char *input = "\n\n\nindex\n\n\n\n

body

\n

hello world

\n跳转\n\n\n\n\n"; + + const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"whole_file\",\"contained_keyword\":\"hello world\"},\ + \"target_element\":{\"target_distance_from_matching\":0,\"element_treatment\":\"mark\"}}]}"; + + simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0); + EXPECT_TRUE(output_sz>0); + + const char *expect_output = "\n\n\nindex\n\n\n\n

body

\n

hello world

\n跳转\n\n\n\n\n"; + + printf("output = %s\n", output); + EXPECT_TRUE(0==strcmp(output, expect_output)); + free(output); +} + +TEST(EditElement, Libxml_Inside_Remove_Index01) +{ + char* output=NULL; + size_t output_sz=0; + + const char *input = "\n\n\nindex\n\n\n\n

body

\n

hello world

\n\ + 跳转\n\n\n\n\n"; + + const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"inside_element\",\"start_indicator\":\"LC20lb\",\"contained_keyword\":\"hello world\"},\ + \"target_element\":{\"target_distance_from_matching\":0,\"element_treatment\":\"remove\"}}]}"; + + simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0); + EXPECT_TRUE(output_sz>0); + + printf("output = %s\n", output); + EXPECT_TRUE(NULL==strstr(output, "hello world")); + free(output); +} + +TEST(EditElement, Cjson_Whole_Remove_Simple) +{ + char* output=NULL; + size_t output_sz=0; + + const char *input = "{\"testkey\":\"value\",\"verifyList\":[{\"policyType\":\"tsg_security\",\"verifySession\":{\"attributes\":[{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"2.5.6.7\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"}}]}}]}"; + + const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"whole_file\",\"contained_keyword\":\"192.168.50.37\"},\ + \"target_element\":{\"target_distance_from_matching\":2,\"element_treatment\":\"remove\"}}]}"; + + simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0); + EXPECT_TRUE(output_sz>0); + + printf("output = %s\n", output); + EXPECT_TRUE(NULL==strstr(output, "192.168.50.37")); + free(output); +} + +TEST(EditElement, Cjson_Whole_mark_Simple) +{ + char* output=NULL; + size_t output_sz=0; + + const char *input = "{\"testkey\":\"value\",\"verifyList\":[{\"policyType\":\"tsg_security\",\"verifySession\":{\"attributes\":[{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"2.5.6.7\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"}}]}}]}"; + + const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"whole_file\",\"contained_keyword\":\"192.168.50.37\"},\ + \"target_element\":{\"target_distance_from_matching\":2,\"element_treatment\":\"mark\"}}]}"; + + simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0); + EXPECT_TRUE(output_sz>0); + + const char *expect_output = "{\"testkey\":\"value\",\"verifyList\":[{\"policyType\":\"tsg_security\",\"verifySession\":{\"attributes\":[{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"},\"need_filter\":true},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"2.5.6.7\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"},\"need_filter\":true}]}}],\"need_check\":true}"; + printf("output = %s\n", output); + EXPECT_TRUE(0==strcmp(output, expect_output)); + free(output); +} + +TEST(EditElement, Libxml_Whole_Remove_Facebook) +{ + char* output=NULL; + size_t output_sz=0,input_len=0; + + const char* filename="./test_data/facebook_index.html"; + + FILE* fp=NULL; + struct stat file_info; + stat(filename, &file_info); + size_t input_sz=file_info.st_size; + + fp=fopen(filename,"r"); + ASSERT_FALSE(fp==NULL); + if(fp==NULL) + { + return; + } + char* input=(char*)malloc(input_sz); + fread(input,1,input_sz,fp); + fclose(fp); + + const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"inside_element\",\"start_indicator\":\"_2t-a _4pmj _2t-d\",\"contained_keyword\":\"Facebook\"},\ + \"target_element\":{\"target_distance_from_matching\":5,\"element_treatment\":\"remove\"}}]}"; + + simple_edit_element(user_region, input, input_len, &output, &output_sz, 0); + EXPECT_TRUE(output_sz>0); + + EXPECT_TRUE(NULL==strstr(output, "_2t-a _4pmj _2t-d")); + free(output); +} + +int main(int argc, char ** argv) +{ + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + -- cgit v1.2.3