summaryrefslogtreecommitdiff
path: root/src/get_td_mistake_lost/get_TD_SFH.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/get_td_mistake_lost/get_TD_SFH.c')
-rw-r--r--src/get_td_mistake_lost/get_TD_SFH.c162
1 files changed, 162 insertions, 0 deletions
diff --git a/src/get_td_mistake_lost/get_TD_SFH.c b/src/get_td_mistake_lost/get_TD_SFH.c
new file mode 100644
index 0000000..2ed3ecd
--- /dev/null
+++ b/src/get_td_mistake_lost/get_TD_SFH.c
@@ -0,0 +1,162 @@
+/*
+gcc -g get_TD_SFH.c -o get_TD_SFH -lmaatframe -lMESA_htable -I../include
+./get_mistake_level ../data/ripe_data/td_data_20171207/all_av_digest_mistake_level
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "gram_index_engine.h"
+#include <MESA/MESA_htable.h>
+#include <assert.h>
+#include <ctype.h>
+#define BUFFER_LEN (15*1024)
+#define SFH_LEN (10*1024)
+#define TD_LEN 33
+#define THREAD_SAFE 0
+#define SLOT_SIZE (1024*1024*16)
+#define TD_STR_LEN (10*1024)
+#define TIME_STR_LEN 128
+
+typedef struct sfh_link
+{
+ // char *time_str;
+ char *sfh_str;
+ char *td_ori;
+ // char *md5_32k;
+ int similiar;
+ int all_similiar;
+ // long hash_len;
+ struct sfh_link *next;
+}sfh_link;
+
+typedef struct sfh
+{
+ int all_num;
+ int all_similiar;
+ char *sfh_str;
+ // long hash_len;
+ sfh_link *sfh_link_items;
+}sfh;
+
+void print_td_sfh(const uchar *key,uint size,void *data,void *arg)
+{
+ FILE *ripe_file=(FILE*)arg;
+ sfh *temp_sfh=(sfh*)data;
+ fprintf(ripe_file,"%s;%s;%s",key,temp_sfh->sfh_link_items->td_ori,temp_sfh->sfh_str);
+}
+
+int main()
+{
+ FILE *raw_file;
+ FILE *ripe_file;
+ char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt";
+ char *ripe_file_dir="../../data/ripe_data/td_data_20171207/TD_SFH_3";
+ raw_file = fopen(raw_file_dir,"r+");
+ ripe_file = fopen(ripe_file_dir,"w+");
+ if(raw_file==NULL)
+ {
+ printf("open all_av_digest error\n");
+ return -1;
+ }
+ if(ripe_file==NULL)
+ {
+ printf("open all_av_digest_mistake_level error");
+ return -1;
+ }
+ MESA_htable_handle htable=NULL;
+ char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL,*md5_32k_str=NULL,*time_str=NULL;
+ int i=0,thread_safe=THREAD_SAFE,ret=0,temp_mistake=0,temp_similiar=0,temp_all_similiar=0;
+ unsigned int slot_size=SLOT_SIZE;
+ sfh *temp_sfh=NULL;
+ sfh_link *temp_sfh_link=NULL;
+ buffer = (char*)calloc(BUFFER_LEN,sizeof(char));
+ sfh_str = (char*)calloc(SFH_LEN,sizeof(char));
+ td = (char*)calloc(TD_LEN,sizeof(char));
+ td[32]='\0';
+ td_str = (char*)calloc(TD_STR_LEN,sizeof(char));
+ // md5_32k_str = (char*)calloc(TD_LEN,sizeof(char));
+ // time_str = (char*)calloc(TIME_STR_LEN,sizeof(char));
+ // time_str[TIME_STR_LEN-1]='\0';
+ // md5_32k_str[32]='\0';
+ htable=MESA_htable_born();
+ MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int));
+ MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int));
+ MESA_htable_mature(htable);
+ while(feof(raw_file)==0)
+ {
+ i++;
+ if(i%100000==0)
+ {
+ printf("%d\n",i);
+ }
+ fgets(buffer,BUFFER_LEN-1,raw_file);
+ buffer[BUFFER_LEN-1]='\0';
+ // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\
+ // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\
+ // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str);
+ // assert(ret==5);
+ ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str);
+ assert(ret==3);
+ td[32]='\0';
+ // md5_32k_str[32]='\0';
+ if((temp_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL)
+ {
+ temp_sfh=(sfh*)calloc(1,sizeof(sfh));
+ temp_sfh->all_num=1;
+ temp_sfh->all_similiar=0;
+ temp_sfh->sfh_str=strdup(sfh_str);
+ temp_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link));
+ temp_sfh->sfh_link_items->sfh_str=strdup(sfh_str);
+ temp_sfh->sfh_link_items->td_ori=strdup(td_str);
+ // temp_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str);
+ // temp_sfh->sfh_link_items->time_str=strdup(time_str);
+ temp_sfh->sfh_link_items->similiar=0;
+ temp_sfh->sfh_link_items->all_similiar=0;
+ temp_sfh->sfh_link_items->next=NULL;
+ ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_sfh);
+ assert(ret>0);
+ }
+ else
+ {
+ temp_similiar=GIE_sfh_similiarity(temp_sfh->sfh_str,(int)strlen(temp_sfh->sfh_str),sfh_str,(int)strlen(sfh_str));
+ temp_sfh->all_similiar+=temp_similiar;
+ temp_sfh_link=temp_sfh->sfh_link_items;
+ for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next)
+ {
+ temp_similiar=GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str));
+ temp_sfh_link->all_similiar+=temp_similiar;
+ temp_all_similiar+=temp_similiar;
+ if(temp_sfh_link->all_similiar>temp_sfh->all_similiar)
+ {
+ free(temp_sfh->sfh_str);
+ temp_sfh->sfh_str=strdup(temp_sfh_link->sfh_str);
+ temp_sfh->all_similiar=temp_sfh_link->all_similiar;
+ }
+ if(temp_sfh_link->next==NULL)
+ {
+ break;
+ }
+ }
+ temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link));
+ temp_sfh_link->next->sfh_str=strdup(sfh_str);
+ temp_sfh_link->next->td_ori=strdup(td_str);
+ // temp_sfh_link->next->md5_32k=strdup(md5_32k_str);
+ // temp_sfh_link->next->time_str=strdup(time_str);
+ temp_sfh_link->next->similiar=0;
+ temp_sfh_link->next->all_similiar=temp_all_similiar;
+ temp_sfh_link->next->next=NULL;
+ temp_sfh->all_num+=1;
+ }
+ }
+ fclose(raw_file);
+ MESA_htable_iterate(htable,print_td_sfh,ripe_file);
+ free(sfh_str);
+ free(td);
+ free(td_str);
+ // free(md5_32k_str);
+ MESA_htable_destroy(htable,NULL);
+ // fclose(raw_file);
+ fclose(ripe_file);
+ return 0;
+} \ No newline at end of file