summaryrefslogtreecommitdiff
path: root/src/dataset_build/get_lost.c
blob: 0e6c452d3a2e752e197e1c248e0d6dd87d6017a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <MESA/MESA_htable.h>
#include <assert.h>
#include <ctype.h>
#define HTABLE_SIZE 8*64*1024*1024
#define SFH_PASS_RATE 0.8
#define SIMILIAR 80

typedef struct td
{
	char * tdstr;
	unsigned int lost;
}td;

typedef struct file_sfh_data
{
	long id;
	char * sfh;
	td * td_value;
	char * td_ori;
}file_sfh_data;

int main(int argc,char *argv[])
{
	FILE *fpread;//文件
	FILE *fpwrite;//write file handle
	int array_size = 1024;
	file_sfh_data **file_data=(file_sfh_data **)malloc(sizeof(file_sfh_data)*array_size);
	char* dirstr = "../../data/td_data_set/td_data_20171207/td_sfh_lost";
	//char* dirstr = *++argv;
	char* writestr = "../../data/td_data_set/td_data_20171207/td.txt";
	int total_len = 0;
	char TD_tmp[256], SFH_tmp[1024*300], TD_ORI[1024*10];
	char buffer[1024*300+1];
	int ret = 0;
	int line = 0;
	int thread_safe = 0;
	int i;
	int id;
	int similiarity;
	MESA_htable_handle htable = NULL;
	fpread=fopen(dirstr,"rb");
	fpwrite=fopen(writestr,"w");
	printf("file str is %s\n",dirstr);
	if(fpread==NULL)
    {
		printf("open file error\n");
		return -1;
	}
	buffer[sizeof(buffer)]='\0';
	while(feof(fpread)==0)
	{
		fgets(buffer,sizeof(buffer)-1,fpread);
		ret=sscanf(buffer,"%d;%[^;];%[^;];%s",&total_len,TD_ORI,TD_tmp,SFH_tmp);
		if(ret!=4)
		{
			continue;
		}
		file_data[line]=(file_sfh_data*)calloc(1,sizeof(file_sfh_data));
		file_data[line]->id=line;
		file_data[line]->sfh=strdup(SFH_tmp);
		file_data[line]->td_value=(td*)calloc(1,sizeof(td));
		file_data[line]->td_value->tdstr=strdup(TD_tmp);
		file_data[line]->td_value->lost=0;
		file_data[line]->td_ori=strdup(TD_ORI);
		line++;
		if(line==array_size)
		{
			array_size*=2;
			file_data=realloc(file_data,sizeof(file_sfh_data)*array_size);
		}
	}
	printf("read file success!\n");
	htable = NULL;
	htable=MESA_htable_born();
	thread_safe = 0;
	MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int));
	unsigned int slot_size=1024*1024*16;
	MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(slot_size));
	MESA_htable_mature(htable);
	for(i=0;i<line;i++)
	{
		if(MESA_htable_add(htable,(char*)(file_data[i]->td_value->tdstr),32,(void *)file_data[i]->id)<0)
		{
			id=(long)MESA_htable_search(htable,(char*)file_data[i]->td_value->tdstr,32);
			similiarity=GIE_sfh_similiarity(file_data[id]->sfh,(int)strlen(file_data[id]->sfh),file_data[i]->sfh,(int)strlen(file_data[i]->sfh));
			if(similiarity<SIMILIAR)
			{
				file_data[id]->td_value->lost = 1;
				file_data[i]->td_value->lost = 1;
			}
		}
	}
	for(i=0;i<line;i++)
	{
		fprintf(fpwrite,"%s;%s;%s;%d\n",file_data[i]->td_value->tdstr,file_data[i]->sfh,file_data[i]->td_ori,file_data[i]->td_value->lost);
	}
	for(i=0;i<line;i++)
	{
		free(file_data[i]->sfh);
		file_data[i]->sfh=NULL;
		free(file_data[i]->td_value->tdstr);
		file_data[i]->td_value->tdstr=NULL;
		free(file_data[i]->td_value);
		file_data[i]->td_value=NULL;
		free(file_data[i]->td_ori);
		file_data[i]->td_ori=NULL;
		free(file_data[i]);
		file_data[i]=NULL;
	}
	fclose(fpread);
	fclose(fpwrite);
	return 0;
}