diff options
| author | 陈冠林 <[email protected]> | 2019-06-18 10:44:20 +0800 |
|---|---|---|
| committer | 陈冠林 <[email protected]> | 2019-06-18 10:44:20 +0800 |
| commit | b2a2f39d89b3bd154da10eb619f8a40c7c6b15d2 (patch) | |
| tree | b7a7d489030cfcc3b2fa878520d8c5d42dc5fce6 /src/dataset_build/get_lost.c | |
| parent | b026525362d7f3b0ad58fb74362bf7f95ab515e8 (diff) | |
Diffstat (limited to 'src/dataset_build/get_lost.c')
| -rw-r--r-- | src/dataset_build/get_lost.c | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/src/dataset_build/get_lost.c b/src/dataset_build/get_lost.c new file mode 100644 index 0000000..0e6c452 --- /dev/null +++ b/src/dataset_build/get_lost.c @@ -0,0 +1,116 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <MESA/MESA_htable.h> +#include <assert.h> +#include <ctype.h> +#define HTABLE_SIZE 8*64*1024*1024 +#define SFH_PASS_RATE 0.8 +#define SIMILIAR 80 + +typedef struct td +{ + char * tdstr; + unsigned int lost; +}td; + +typedef struct file_sfh_data +{ + long id; + char * sfh; + td * td_value; + char * td_ori; +}file_sfh_data; + +int main(int argc,char *argv[]) +{ + FILE *fpread;//文件 + FILE *fpwrite;//write file handle + int array_size = 1024; + file_sfh_data **file_data=(file_sfh_data **)malloc(sizeof(file_sfh_data)*array_size); + char* dirstr = "../../data/td_data_set/td_data_20171207/td_sfh_lost"; + //char* dirstr = *++argv; + char* writestr = "../../data/td_data_set/td_data_20171207/td.txt"; + int total_len = 0; + char TD_tmp[256], SFH_tmp[1024*300], TD_ORI[1024*10]; + char buffer[1024*300+1]; + int ret = 0; + int line = 0; + int thread_safe = 0; + int i; + int id; + int similiarity; + MESA_htable_handle htable = NULL; + fpread=fopen(dirstr,"rb"); + fpwrite=fopen(writestr,"w"); + printf("file str is %s\n",dirstr); + if(fpread==NULL) + { + printf("open file error\n"); + return -1; + } + buffer[sizeof(buffer)]='\0'; + while(feof(fpread)==0) + { + fgets(buffer,sizeof(buffer)-1,fpread); + ret=sscanf(buffer,"%d;%[^;];%[^;];%s",&total_len,TD_ORI,TD_tmp,SFH_tmp); + if(ret!=4) + { + continue; + } + file_data[line]=(file_sfh_data*)calloc(1,sizeof(file_sfh_data)); + file_data[line]->id=line; + file_data[line]->sfh=strdup(SFH_tmp); + file_data[line]->td_value=(td*)calloc(1,sizeof(td)); + file_data[line]->td_value->tdstr=strdup(TD_tmp); + file_data[line]->td_value->lost=0; + file_data[line]->td_ori=strdup(TD_ORI); + line++; + if(line==array_size) + { + array_size*=2; + file_data=realloc(file_data,sizeof(file_sfh_data)*array_size); + } + } + printf("read file success!\n"); + htable = NULL; + htable=MESA_htable_born(); + thread_safe = 0; + MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); + unsigned int slot_size=1024*1024*16; + MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(slot_size)); + MESA_htable_mature(htable); + for(i=0;i<line;i++) + { + if(MESA_htable_add(htable,(char*)(file_data[i]->td_value->tdstr),32,(void *)file_data[i]->id)<0) + { + id=(long)MESA_htable_search(htable,(char*)file_data[i]->td_value->tdstr,32); + similiarity=GIE_sfh_similiarity(file_data[id]->sfh,(int)strlen(file_data[id]->sfh),file_data[i]->sfh,(int)strlen(file_data[i]->sfh)); + if(similiarity<SIMILIAR) + { + file_data[id]->td_value->lost = 1; + file_data[i]->td_value->lost = 1; + } + } + } + for(i=0;i<line;i++) + { + fprintf(fpwrite,"%s;%s;%s;%d\n",file_data[i]->td_value->tdstr,file_data[i]->sfh,file_data[i]->td_ori,file_data[i]->td_value->lost); + } + for(i=0;i<line;i++) + { + free(file_data[i]->sfh); + file_data[i]->sfh=NULL; + free(file_data[i]->td_value->tdstr); + file_data[i]->td_value->tdstr=NULL; + free(file_data[i]->td_value); + file_data[i]->td_value=NULL; + free(file_data[i]->td_ori); + file_data[i]->td_ori=NULL; + free(file_data[i]); + file_data[i]=NULL; + } + fclose(fpread); + fclose(fpwrite); + return 0; +}
\ No newline at end of file |
