summaryrefslogtreecommitdiff
path: root/plugin/business/pangu-http/src/pangu_element_edit.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'plugin/business/pangu-http/src/pangu_element_edit.cpp')
-rw-r--r--plugin/business/pangu-http/src/pangu_element_edit.cpp667
1 files changed, 667 insertions, 0 deletions
diff --git a/plugin/business/pangu-http/src/pangu_element_edit.cpp b/plugin/business/pangu-http/src/pangu_element_edit.cpp
new file mode 100644
index 0000000..b14ee63
--- /dev/null
+++ b/plugin/business/pangu-http/src/pangu_element_edit.cpp
@@ -0,0 +1,667 @@
+#include <tfe_utils.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include <libxml/tree.h>
+#include <libxml/xmlsave.h>
+#include <libxml/HTMLparser.h>
+#include <cjson/cJSON.h>
+
+#include "pangu_element_edit.h"
+
+int cjson_element_foreach(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num);
+static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match);
+
+enum search_scope scope_name_to_id(const char * name)
+{
+ const char * std_name[] = {"inside_element","whole_file"};
+ size_t i = 0;
+ for (i = 0; i < sizeof(std_name) / sizeof(const char *); i++)
+ {
+ if (0 == strcasecmp(name, std_name[i]))
+ {
+ break;
+ }
+ }
+ return (enum search_scope) i;
+}
+
+int match_start_indicator(xmlNodePtr parent, char * start_indicator)
+{
+ if(parent->properties == NULL)
+ {
+ return 0;
+ }
+
+ struct _xmlAttr *properties = parent->properties;
+ if(properties->children == NULL || properties->children->content == NULL)
+ {
+ return 0;
+ }
+
+ xmlNodePtr children = properties->children;
+ if(!strcasecmp((char *)children->content, start_indicator))
+ {
+ return 1;
+ }
+
+ return 0;
+}
+
+int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, const struct element_rule * rules)
+{
+ const char *element_treatment=rules->element_treatment;
+
+ if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
+ {
+ cJSON_AddBoolToObject(a_element, "need_filter", true);
+ }
+
+ char * start_indicator = rules->start_indicator;
+ if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
+ {
+ if (rules->scope == kScopeInside)
+ {
+ if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator))
+ {
+ return 0;
+ }
+ }
+
+ if(a_element->type == cJSON_Object && *node != NULL)
+ {
+ cJSON_DeleteItemFromObject(a_element, *node);
+ }
+ if(a_element->type == cJSON_Array)
+ {
+ cJSON_DeleteItemFromArray(a_element, *step);
+ }
+ }
+
+ return 0;
+}
+
+int construct_html_by_treatment(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
+{
+ int k=0;
+
+ if(strcasestr((char *)node->content, rules->contained_keyword) == NULL)
+ {
+ return 0;
+ }
+
+ const char *element_treatment=rules->element_treatment;
+ int distane_from_matching = rules->distane_from_matching;
+
+ if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
+ {
+ xmlNodePtr parent = node->parent;
+ k++;
+ while (parent != NULL)
+ {
+ if(k == distane_from_matching)
+ {
+ xmlNewProp(parent, (const xmlChar *)"need_filter", (const xmlChar *)"true");
+ *match =1;
+ break;
+ }
+ k++;
+ parent = parent->parent;
+ }
+ }
+
+ char * start_indicator = rules->start_indicator;
+ if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
+ {
+ xmlNodePtr parent = node->parent;
+ k++;
+ while (parent != NULL)
+ {
+ if(k == distane_from_matching)
+ {
+ if (rules->scope == kScopeInside && match_start_indicator(parent, start_indicator) == 0)
+ {
+ break;
+ }
+
+ /*This is the top floor, Don't deal with**/
+ if(parent->parent == NULL)
+ {
+ break;
+ }
+
+ if(*n_parent < 16)
+ {
+ parent_array[*n_parent] = parent;
+ *n_parent = *n_parent+1;
+ }
+
+ *match =1;
+ break;
+ }
+
+ k++;
+ parent = parent->parent;
+ }
+ }
+
+ return 0;
+}
+
+int cjson_dump_array(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num)
+{
+ int xret=0;
+
+ cJSON *a_element = a->child;
+ for (; (a_element != NULL);)
+ {
+ xret = cjson_element_foreach(a_element, depth, step, node, rules, match_num);
+ if(xret == -1)
+ {
+ return -1;
+ }
+ if(*depth == 0)
+ {
+ construct_cjson_by_treatment(a_element, node, step, rules);
+ }
+ if(xret == 1)
+ {
+ *node = a_element->string;
+ *depth = *depth -1;
+ return 1;
+ }
+ *step= *step + 1;
+ a_element = a_element->next;
+ }
+
+ return xret;
+}
+
+int cjson_dump_object(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num)
+{
+ int xret=0;
+ cJSON *a_element=NULL;
+
+ cJSON_ArrayForEach(a_element, a)
+ {
+ xret = cjson_element_foreach(a_element, depth, step, node, rules, match_num);
+ if(xret == -1)
+ {
+ return -1;
+ }
+ if(*depth == 0)
+ {
+ construct_cjson_by_treatment(a_element, node, step, rules);
+ }
+ if(xret == 1)
+ {
+ *node = a_element->string;
+ *depth = *depth -1;
+ return 1;
+ }
+ }
+ return xret;
+}
+
+int cjson_dump_string(cJSON *a, int *depth, const struct element_rule * rules, int *match_num)
+{
+ int xret=0;
+
+ if((a->valuestring != NULL) && strcasestr(a->valuestring, rules->contained_keyword))
+ {
+ if(*depth != -1)
+ {
+ xret = 1;
+ goto finish;
+ }
+ *match_num = *match_num + 1;
+ }
+finish:
+ return xret;
+}
+
+int cjson_element_foreach(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num)
+{
+ if ((a == NULL) || cJSON_IsInvalid(a))
+ {
+ return -1;
+ }
+
+ switch (a->type & 0xFF)
+ {
+ case cJSON_String:
+ case cJSON_Raw:
+ return cjson_dump_string(a, depth, rules, match_num);
+
+ case cJSON_Array:
+ return cjson_dump_array(a, depth, step, node, rules, match_num);
+
+ case cJSON_Object:
+ return cjson_dump_object(a, depth, step, node, rules, match_num);
+
+ case cJSON_Number:
+ case cJSON_False:
+ case cJSON_True:
+ case cJSON_NULL:
+ return 0;
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
+static void html_namespace_list(xmlNsPtr ns)
+{
+ while (ns != NULL)
+ {
+ ns = ns->next;
+ }
+}
+
+static void html_attr_list(const struct element_rule * rules, xmlAttrPtr attr, xmlNodePtr *parent_array, size_t *n_parent, int *match)
+{
+ while (attr != NULL)
+ {
+ if (attr->children != NULL)
+ {
+ html_node_list(rules, attr->children, parent_array, n_parent, match);
+ }
+
+ attr = attr->next;
+ }
+}
+
+static void html_dump_one_node(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
+{
+ switch (node->type)
+ {
+ case XML_ELEMENT_NODE:
+ case XML_ELEMENT_DECL:
+ case XML_CDATA_SECTION_NODE:
+ case XML_ENTITY_REF_NODE:
+ case XML_ENTITY_NODE:
+ case XML_PI_NODE:
+ case XML_COMMENT_NODE:
+ case XML_DOCUMENT_TYPE_NODE:
+ case XML_DOCUMENT_FRAG_NODE:
+ case XML_NOTATION_NODE:
+ case XML_TEXT_NODE:
+ break;
+
+ default:
+ return;
+ }
+ if ((node->type == XML_ELEMENT_NODE) && (node->nsDef != NULL))
+ {
+ html_namespace_list(node->nsDef);
+ }
+
+ if ((node->type == XML_ELEMENT_NODE) && (node->properties != NULL))
+ {
+ html_attr_list(rules, node->properties, parent_array, n_parent, match);
+ }
+
+ if (node->type != XML_ENTITY_REF_NODE)
+ {
+ if ((node->type != XML_ELEMENT_NODE) && (node->content != NULL))
+ {
+ construct_html_by_treatment(rules, node, parent_array, n_parent, match);
+ }
+ }
+}
+
+static void html_dump_node(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
+{
+ html_dump_one_node(rules, node, parent_array, n_parent, match);
+ if ((node->type != XML_NAMESPACE_DECL) && (node->children != NULL) && (node->type != XML_ENTITY_REF_NODE))
+ {
+ html_node_list(rules, node->children, parent_array, n_parent, match);
+ }
+}
+
+static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
+{
+ while (node != NULL)
+ {
+ html_dump_node(rules, node, parent_array, n_parent, match);
+ node = node->next;
+ }
+}
+
+static void html_element_foreach(const struct element_rule * rules, xmlDocPtr doc, xmlNodePtr *parent_array, size_t *n_parent, int *match)
+{
+ if (((doc->type == XML_DOCUMENT_NODE) || (doc->type == XML_HTML_DOCUMENT_NODE)) && (doc->children != NULL))
+ {
+ html_node_list(rules, doc->children, parent_array, n_parent, match);
+ }
+}
+
+size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
+{
+ int step=0, depth=0, match_num=0,i=0, match=0;
+ cJSON* interator=NULL;
+ char* new_out = NULL, *node=NULL;
+ size_t outlen=0;
+ char *element_treatment=NULL;
+
+ interator = cJSON_Parse(in);
+ if(interator==NULL)
+ {
+ goto finish;
+ }
+
+ depth = -1;
+ element_treatment=rules->element_treatment;
+
+ cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
+ for(i=0; i< match_num; i++)
+ {
+ depth = rules->distane_from_matching;
+ step=0; node=NULL;
+ match |= cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
+
+ if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
+ {
+ cJSON_DeleteItemFromObject(interator, node);
+ }
+ }
+
+ if(match==1 && element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
+ {
+ cJSON_AddBoolToObject(interator, "need_check", true);
+ }
+
+ new_out = cJSON_PrintUnformatted(interator);
+ if(new_out!=NULL)
+ {
+ *out = new_out;
+ outlen = strlen(new_out);
+ }
+
+finish:
+ if(interator != NULL)
+ cJSON_Delete(interator);
+ return outlen;
+}
+
+size_t format_json_file_type(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
+{
+ int step=0, depth=0, match=0, i=0;
+ cJSON* interator=NULL;
+ char* new_out = NULL, *node=NULL;
+ size_t outlen=0; int match_num=0;
+ char *element_treatment=NULL;
+
+ char*new_in = ALLOC(char, in_sz+1);
+ memcpy(new_in, in, in_sz);
+
+ interator = cJSON_Parse(new_in);
+ if(interator==NULL)
+ {
+ goto finish;
+ }
+
+ depth = -1;
+ element_treatment=rules->element_treatment;
+
+ cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
+ for(i=0; i< match_num; i++)
+ {
+ depth = rules->distane_from_matching;
+ step=0; node=NULL;
+ match |= cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
+
+ if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
+ {
+ cJSON_DeleteItemFromObject(interator, node);
+ }
+ }
+
+ if(match == 0)
+ {
+ goto finish;
+ }
+
+ if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
+ {
+ cJSON_AddBoolToObject(interator, "need_check", true);
+ }
+
+ new_out = cJSON_Print(interator);
+ if(new_out!=NULL)
+ {
+ *out = new_out;
+ outlen = strlen(*out);
+ }
+
+finish:
+ if(interator != NULL)
+ cJSON_Delete(interator);
+ FREE(&new_in);
+ return outlen;
+}
+
+size_t format_multidelete_json_type(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
+{
+ char *new_out=NULL, *pre_out=NULL;
+ char * tmp = ALLOC(char, in_sz+1);
+ char * token = NULL, * sub_token = NULL, * saveptr = NULL;
+ size_t output_size = 0;
+
+ size_t new_out_len=0;
+ /**Follow-up optimization */
+ new_out = ALLOC(char, in_sz+in_sz/3);
+
+ memcpy(tmp, in, in_sz);
+
+ for (token = tmp;; token = NULL)
+ {
+ sub_token = strtok_r(token, "\n", &saveptr);
+ if (sub_token == NULL)
+ {
+ new_out[new_out_len-2]='\0';
+ break;
+ }
+ output_size = parse_json_output_unformatted(sub_token, strlen(sub_token), rules, &pre_out);
+ if(output_size>0 && pre_out!=NULL)
+ {
+ memcpy(new_out+new_out_len, pre_out, strlen(pre_out));
+ new_out_len += strlen(pre_out);
+ memcpy(new_out+new_out_len, "\r\n", 2);
+ new_out_len +=2;
+ FREE(&pre_out);
+ }
+ }
+
+ if(new_out)
+ {
+ *out = new_out;
+ output_size = strlen(new_out);
+ }
+
+ free(tmp);
+ tmp = NULL;
+ return output_size;
+}
+
+size_t construct_format_html(htmlDocPtr doc, char**out)
+{
+ size_t outlen=0;
+ xmlBufferPtr out_buffer;
+ const xmlChar *xmlCharBuffer;
+ xmlSaveCtxtPtr saveCtxtPtr;
+
+ out_buffer = xmlBufferCreate();
+ if (out_buffer == NULL)
+ {
+ goto finish;
+ }
+
+ saveCtxtPtr = xmlSaveToBuffer(out_buffer, "UTF-8", XML_SAVE_NO_DECL);
+ if (xmlSaveDoc(saveCtxtPtr, doc) < 0)
+ {
+ goto finish;
+ }
+ xmlSaveClose(saveCtxtPtr);
+
+ xmlCharBuffer = xmlBufferContent(out_buffer);
+ if(xmlCharBuffer != NULL)
+ {
+ char*new_out = ALLOC(char, strlen((char *)xmlCharBuffer)+1);
+ memcpy(new_out, (char *)xmlCharBuffer, strlen((char *)xmlCharBuffer));
+
+ *out = new_out;
+ outlen = strlen((char *)xmlCharBuffer);
+ }
+
+finish:
+ if(out_buffer != NULL)
+ {
+ xmlBufferFree(out_buffer);
+ }
+
+ return outlen;
+}
+
+size_t format_input_html(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
+{
+ size_t outlen=0, n_parent=0, n_parent_peer=0;
+ int match=0, i=0;
+ htmlDocPtr doc = NULL;
+ const char *element_treatment=NULL;
+ xmlNodePtr parent_array[16];
+
+ int options = XML_PARSE_NOERROR;
+
+ doc = htmlReadMemory(in, in_sz, NULL, NULL, options);
+ if (doc == NULL)
+ {
+ goto finish;
+ }
+
+ html_element_foreach(rules, doc, parent_array, &n_parent, &match);
+ if(match != 1)
+ {
+ goto finish;
+ }
+
+ n_parent_peer = n_parent;
+ element_treatment=rules->element_treatment;
+
+ if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
+ {
+ for(i=0; i < (int)n_parent_peer; i++)
+ {
+ match =0; n_parent = 0;
+ html_element_foreach(rules, doc, parent_array, &n_parent, &match);
+ if(match == 1)
+ {
+ xmlUnlinkNode(parent_array[0]);
+ xmlFreeNode(parent_array[0]);
+ }
+ }
+ }
+
+ if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
+ {
+ if(doc->children != NULL && doc->children->next != NULL)
+ {
+ xmlNewProp(doc->children->next, (const xmlChar *)"need_check", (const xmlChar *)"true");
+ }
+ }
+
+ outlen = construct_format_html(doc, out);
+ if(outlen<=0)
+ {
+ outlen=0;
+ }
+
+finish:
+ if(doc!=NULL)
+ {
+ xmlFreeDoc(doc);
+ }
+ return outlen;
+}
+
+size_t format_html_file_type(const char * interator, size_t interator_sz, const struct element_rule *rule, char **new_out)
+{
+ size_t output_size=0;
+
+ if(interator[0] == '{')
+ {
+ output_size = format_multidelete_json_type(interator, interator_sz, rule, new_out);
+ }
+ else
+ {
+ output_size = format_input_html(interator, interator_sz, rule, new_out);
+ }
+
+ return output_size;
+}
+
+size_t parse_string(const char * interator, size_t interator_sz, const struct element_rule *rule, char **new_out, int options)
+{
+ size_t output_size=0;
+
+ if(options)
+ {
+ output_size = format_json_file_type(interator, interator_sz, rule, new_out);
+ }
+ else
+ {
+ output_size = format_html_file_type(interator, interator_sz, rule, new_out);
+ }
+
+ return output_size;
+}
+
+size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct element_rule *rules, size_t n_rule, char** out, int options)
+{
+ const struct element_rule * todo = rules;
+ size_t i = 0, interator_sz=0, pre_out_sz=0;
+ const char * interator = NULL;
+ char* new_out = NULL, * pre_out = NULL;
+ size_t output_size=0;
+ if (in_sz == 0 || in==NULL)
+ {
+ return 0;
+ }
+ interator = in;
+ interator_sz = in_sz;
+ for (i = 0; i < n_rule; i++)
+ {
+ output_size = parse_string(interator, interator_sz, &(todo[i]), &new_out, options);
+ if (output_size == 0)
+ {
+ continue;
+ }
+ if (pre_out != NULL)
+ {
+ free(pre_out);
+ pre_out = NULL;
+ }
+ pre_out = new_out;
+ pre_out_sz = output_size;
+
+ interator = new_out;
+ interator_sz = output_size;
+
+ new_out=NULL;
+ output_size=0;
+ }
+ if(pre_out_sz>0)
+ {
+ *out=pre_out;
+ return pre_out_sz;
+ }
+ else
+ {
+ return 0;
+ }
+}
+