summaryrefslogtreecommitdiff
path: root/src/dataset_build/get_lost.c
diff options
context:
space:
mode:
author陈冠林 <[email protected]>2019-06-18 10:44:20 +0800
committer陈冠林 <[email protected]>2019-06-18 10:44:20 +0800
commitb2a2f39d89b3bd154da10eb619f8a40c7c6b15d2 (patch)
treeb7a7d489030cfcc3b2fa878520d8c5d42dc5fce6 /src/dataset_build/get_lost.c
parentb026525362d7f3b0ad58fb74362bf7f95ab515e8 (diff)
添加inc和srcHEADmaster
Diffstat (limited to 'src/dataset_build/get_lost.c')
-rw-r--r--src/dataset_build/get_lost.c116
1 files changed, 116 insertions, 0 deletions
diff --git a/src/dataset_build/get_lost.c b/src/dataset_build/get_lost.c
new file mode 100644
index 0000000..0e6c452
--- /dev/null
+++ b/src/dataset_build/get_lost.c
@@ -0,0 +1,116 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <MESA/MESA_htable.h>
+#include <assert.h>
+#include <ctype.h>
+#define HTABLE_SIZE 8*64*1024*1024
+#define SFH_PASS_RATE 0.8
+#define SIMILIAR 80
+
+typedef struct td
+{
+ char * tdstr;
+ unsigned int lost;
+}td;
+
+typedef struct file_sfh_data
+{
+ long id;
+ char * sfh;
+ td * td_value;
+ char * td_ori;
+}file_sfh_data;
+
+int main(int argc,char *argv[])
+{
+ FILE *fpread;//文件
+ FILE *fpwrite;//write file handle
+ int array_size = 1024;
+ file_sfh_data **file_data=(file_sfh_data **)malloc(sizeof(file_sfh_data)*array_size);
+ char* dirstr = "../../data/td_data_set/td_data_20171207/td_sfh_lost";
+ //char* dirstr = *++argv;
+ char* writestr = "../../data/td_data_set/td_data_20171207/td.txt";
+ int total_len = 0;
+ char TD_tmp[256], SFH_tmp[1024*300], TD_ORI[1024*10];
+ char buffer[1024*300+1];
+ int ret = 0;
+ int line = 0;
+ int thread_safe = 0;
+ int i;
+ int id;
+ int similiarity;
+ MESA_htable_handle htable = NULL;
+ fpread=fopen(dirstr,"rb");
+ fpwrite=fopen(writestr,"w");
+ printf("file str is %s\n",dirstr);
+ if(fpread==NULL)
+ {
+ printf("open file error\n");
+ return -1;
+ }
+ buffer[sizeof(buffer)]='\0';
+ while(feof(fpread)==0)
+ {
+ fgets(buffer,sizeof(buffer)-1,fpread);
+ ret=sscanf(buffer,"%d;%[^;];%[^;];%s",&total_len,TD_ORI,TD_tmp,SFH_tmp);
+ if(ret!=4)
+ {
+ continue;
+ }
+ file_data[line]=(file_sfh_data*)calloc(1,sizeof(file_sfh_data));
+ file_data[line]->id=line;
+ file_data[line]->sfh=strdup(SFH_tmp);
+ file_data[line]->td_value=(td*)calloc(1,sizeof(td));
+ file_data[line]->td_value->tdstr=strdup(TD_tmp);
+ file_data[line]->td_value->lost=0;
+ file_data[line]->td_ori=strdup(TD_ORI);
+ line++;
+ if(line==array_size)
+ {
+ array_size*=2;
+ file_data=realloc(file_data,sizeof(file_sfh_data)*array_size);
+ }
+ }
+ printf("read file success!\n");
+ htable = NULL;
+ htable=MESA_htable_born();
+ thread_safe = 0;
+ MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int));
+ unsigned int slot_size=1024*1024*16;
+ MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(slot_size));
+ MESA_htable_mature(htable);
+ for(i=0;i<line;i++)
+ {
+ if(MESA_htable_add(htable,(char*)(file_data[i]->td_value->tdstr),32,(void *)file_data[i]->id)<0)
+ {
+ id=(long)MESA_htable_search(htable,(char*)file_data[i]->td_value->tdstr,32);
+ similiarity=GIE_sfh_similiarity(file_data[id]->sfh,(int)strlen(file_data[id]->sfh),file_data[i]->sfh,(int)strlen(file_data[i]->sfh));
+ if(similiarity<SIMILIAR)
+ {
+ file_data[id]->td_value->lost = 1;
+ file_data[i]->td_value->lost = 1;
+ }
+ }
+ }
+ for(i=0;i<line;i++)
+ {
+ fprintf(fpwrite,"%s;%s;%s;%d\n",file_data[i]->td_value->tdstr,file_data[i]->sfh,file_data[i]->td_ori,file_data[i]->td_value->lost);
+ }
+ for(i=0;i<line;i++)
+ {
+ free(file_data[i]->sfh);
+ file_data[i]->sfh=NULL;
+ free(file_data[i]->td_value->tdstr);
+ file_data[i]->td_value->tdstr=NULL;
+ free(file_data[i]->td_value);
+ file_data[i]->td_value=NULL;
+ free(file_data[i]->td_ori);
+ file_data[i]->td_ori=NULL;
+ free(file_data[i]);
+ file_data[i]=NULL;
+ }
+ fclose(fpread);
+ fclose(fpwrite);
+ return 0;
+} \ No newline at end of file