diff options
Diffstat (limited to 'src/get_td_mistake_lost/get_TD_SFH.c')
| -rw-r--r-- | src/get_td_mistake_lost/get_TD_SFH.c | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/src/get_td_mistake_lost/get_TD_SFH.c b/src/get_td_mistake_lost/get_TD_SFH.c new file mode 100644 index 0000000..2ed3ecd --- /dev/null +++ b/src/get_td_mistake_lost/get_TD_SFH.c @@ -0,0 +1,162 @@ +/* +gcc -g get_TD_SFH.c -o get_TD_SFH -lmaatframe -lMESA_htable -I../include +./get_mistake_level ../data/ripe_data/td_data_20171207/all_av_digest_mistake_level +*/ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include "gram_index_engine.h" +#include <MESA/MESA_htable.h> +#include <assert.h> +#include <ctype.h> +#define BUFFER_LEN (15*1024) +#define SFH_LEN (10*1024) +#define TD_LEN 33 +#define THREAD_SAFE 0 +#define SLOT_SIZE (1024*1024*16) +#define TD_STR_LEN (10*1024) +#define TIME_STR_LEN 128 + +typedef struct sfh_link +{ + // char *time_str; + char *sfh_str; + char *td_ori; + // char *md5_32k; + int similiar; + int all_similiar; + // long hash_len; + struct sfh_link *next; +}sfh_link; + +typedef struct sfh +{ + int all_num; + int all_similiar; + char *sfh_str; + // long hash_len; + sfh_link *sfh_link_items; +}sfh; + +void print_td_sfh(const uchar *key,uint size,void *data,void *arg) +{ + FILE *ripe_file=(FILE*)arg; + sfh *temp_sfh=(sfh*)data; + fprintf(ripe_file,"%s;%s;%s",key,temp_sfh->sfh_link_items->td_ori,temp_sfh->sfh_str); +} + +int main() +{ + FILE *raw_file; + FILE *ripe_file; + char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt"; + char *ripe_file_dir="../../data/ripe_data/td_data_20171207/TD_SFH_3"; + raw_file = fopen(raw_file_dir,"r+"); + ripe_file = fopen(ripe_file_dir,"w+"); + if(raw_file==NULL) + { + printf("open all_av_digest error\n"); + return -1; + } + if(ripe_file==NULL) + { + printf("open all_av_digest_mistake_level error"); + return -1; + } + MESA_htable_handle htable=NULL; + char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL,*md5_32k_str=NULL,*time_str=NULL; + int i=0,thread_safe=THREAD_SAFE,ret=0,temp_mistake=0,temp_similiar=0,temp_all_similiar=0; + unsigned int slot_size=SLOT_SIZE; + sfh *temp_sfh=NULL; + sfh_link *temp_sfh_link=NULL; + buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); + sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); + td = (char*)calloc(TD_LEN,sizeof(char)); + td[32]='\0'; + td_str = (char*)calloc(TD_STR_LEN,sizeof(char)); + // md5_32k_str = (char*)calloc(TD_LEN,sizeof(char)); + // time_str = (char*)calloc(TIME_STR_LEN,sizeof(char)); + // time_str[TIME_STR_LEN-1]='\0'; + // md5_32k_str[32]='\0'; + htable=MESA_htable_born(); + MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); + MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int)); + MESA_htable_mature(htable); + while(feof(raw_file)==0) + { + i++; + if(i%100000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,raw_file); + buffer[BUFFER_LEN-1]='\0'; + // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\ + // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str); + // assert(ret==5); + ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); + assert(ret==3); + td[32]='\0'; + // md5_32k_str[32]='\0'; + if((temp_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL) + { + temp_sfh=(sfh*)calloc(1,sizeof(sfh)); + temp_sfh->all_num=1; + temp_sfh->all_similiar=0; + temp_sfh->sfh_str=strdup(sfh_str); + temp_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_sfh->sfh_link_items->sfh_str=strdup(sfh_str); + temp_sfh->sfh_link_items->td_ori=strdup(td_str); + // temp_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str); + // temp_sfh->sfh_link_items->time_str=strdup(time_str); + temp_sfh->sfh_link_items->similiar=0; + temp_sfh->sfh_link_items->all_similiar=0; + temp_sfh->sfh_link_items->next=NULL; + ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_sfh); + assert(ret>0); + } + else + { + temp_similiar=GIE_sfh_similiarity(temp_sfh->sfh_str,(int)strlen(temp_sfh->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_sfh->all_similiar+=temp_similiar; + temp_sfh_link=temp_sfh->sfh_link_items; + for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next) + { + temp_similiar=GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_sfh_link->all_similiar+=temp_similiar; + temp_all_similiar+=temp_similiar; + if(temp_sfh_link->all_similiar>temp_sfh->all_similiar) + { + free(temp_sfh->sfh_str); + temp_sfh->sfh_str=strdup(temp_sfh_link->sfh_str); + temp_sfh->all_similiar=temp_sfh_link->all_similiar; + } + if(temp_sfh_link->next==NULL) + { + break; + } + } + temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_sfh_link->next->sfh_str=strdup(sfh_str); + temp_sfh_link->next->td_ori=strdup(td_str); + // temp_sfh_link->next->md5_32k=strdup(md5_32k_str); + // temp_sfh_link->next->time_str=strdup(time_str); + temp_sfh_link->next->similiar=0; + temp_sfh_link->next->all_similiar=temp_all_similiar; + temp_sfh_link->next->next=NULL; + temp_sfh->all_num+=1; + } + } + fclose(raw_file); + MESA_htable_iterate(htable,print_td_sfh,ripe_file); + free(sfh_str); + free(td); + free(td_str); + // free(md5_32k_str); + MESA_htable_destroy(htable,NULL); + // fclose(raw_file); + fclose(ripe_file); + return 0; +}
\ No newline at end of file |
