summaryrefslogtreecommitdiff
path: root/main.cpp
blob: 707cf7e235ab061d76df6c5fd3ae5c7a1dcd4336 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <algorithm>
#include <string>
#include <cstring>
#include <map>
#include <fstream>
#include "init/params.h"
#include "init/BOBHash32.h"
#include "init/BOBHash64.h"
#include "init/ssummary.h"
#include "include/cmsketch.h"
#include "include/CSS.h"
#include "include/heavykeeper.h"
#include "include/LossyCounting.h"
#include "include/spacesaving.h"

using namespace std;
map <string, int> B, C;
struct node { string x; int y; } p[10000005];
ifstream fin("/home/zhang/桌面/heavykeeper/2016.dat", ios::in | ios::binary);
char a[105];
string Read()
{
    fin.read(a, 8);
    a[8] = '\0';
    string tmp = a;
    // for (int i=0; i < 13; i++)
     //{
     //  tmp += a[i];
    // }
    // tmp += '\0';
    return tmp;
}
int cmp(node i, node j) { return i.y > j.y; }
int main()
{
    int MEM, K;
    cin >> MEM >> K;
    cout << "MEM=" << MEM << "KB" << endl << "Find top" << K << endl << endl;
    cout << "preparing all algorithms" << endl;
    int m = 100000;  // the number of flows
    // preparing heavykeeper
    int hk_M;
    for (hk_M = 1; 32 * hk_M * HK_d + 432 * K <= MEM * 1024 * 8; hk_M++); if (hk_M % 2 == 0) hk_M--;
    heavykeeper* hk; hk = new heavykeeper(hk_M, K); hk->clear();
    //preparing cmsketch
    int cm_M;
    for (cm_M = 1; 32 * cm_M * cm_d + 432 * K <= MEM * 1024 * 8; cm_M++); if (cm_M % 2 == 0) cm_M--;
    cmsketch* cm; cm = new cmsketch(cm_M, K); cm->clear();
    // preparing spacesaving
    int ss_M;
    for (ss_M = 1; 432 * ss_M <= MEM * 1024 * 8; ss_M++);
    spacesaving* ss; ss = new spacesaving(ss_M, K);

    // preparing LossyCounting
    int LC_M;
    for (LC_M = 1; 227 * LC_M <= MEM * 1024 * 8; LC_M++);
    LossyCounting* LC; LC = new LossyCounting(K);

    // preparing CSS
    int css_M;
    for (css_M = 1; 179 * css_M + 4 * css_M * log(css_M) / log(2) <= MEM * 1024 * 8; css_M++);
    CSS* css; css = new CSS(css_M, K); css->clear();
    // Inserting
    for (int i = 1; i <= m; i++)
    {
        if (i % (m / 10) == 0) cout << "Insert " << i << endl;
        string s = Read();
        B[s]++;
        hk->Insert(s);
        ss->Insert(s);
        LC->Insert(s, i / LC_M); if (i % LC_M == 0) LC->clear(i / LC_M);
        css->Insert(s);
        cm->Insert(s);
    }
    hk->work();
    ss->work();
    LC->work();
    css->work();
    cm->work();
    cout << "preparing true flow" << endl;
    // preparing true flow
    int cnt = 0;
    for (map <string, int>::iterator sit = B.begin(); sit != B.end(); sit++)
    {
        p[++cnt].x = sit->first;
        p[cnt].y = sit->second;
    }
    sort(p + 1, p + cnt + 1, cmp);
    for (int i = 1; i <= K; i++) C[p[i].x] = p[i].y;

    // Calculating PRE, ARE, AAE
    cout << "Calculating" << endl;
    int hk_sum = 0, hk_AAE = 0; double hk_ARE = 0;
    string hk_string; int hk_num;
    for (int i = 0; i < K; i++)
    {
        hk_string = (hk->Query(i)).first; hk_num = (hk->Query(i)).second;
        hk_AAE += abs(B[hk_string] - hk_num); hk_ARE += abs(B[hk_string] - hk_num) / (B[hk_string] + 0.0);
        if (C[hk_string]) hk_sum++;
    }

    int cm_sum = 0, cm_AAE = 0; double cm_ARE = 0;
    string cm_string; int cm_num;
    for (int i = 0; i < K; i++)
    {
        cm_string = (cm->Query(i)).first; cm_num = (cm->Query(i)).second;
        cm_AAE += abs(B[cm_string] - cm_num); cm_ARE += abs(B[cm_string] - cm_num) / (B[cm_string] + 0.0);
        if (C[cm_string]) cm_sum++;
    }

    int LC_sum = 0, LC_AAE = 0; double LC_ARE = 0;
    string LC_string; int LC_num;
    for (int i = 0; i < K; i++)
    {
        LC_string = (LC->Query(i)).first; LC_num = (LC->Query(i)).second;
        LC_AAE += abs(B[LC_string] - LC_num); LC_ARE += abs(B[LC_string] - LC_num) / (B[LC_string] + 0.0);
        if (C[LC_string]) LC_sum++;
    }

    int ss_sum = 0, ss_AAE = 0; double ss_ARE = 0;
    string ss_string; int ss_num;
    for (int i = 0; i < K; i++)
    {
        ss_string = (ss->Query(i)).first; ss_num = (ss->Query(i)).second;
        ss_AAE += abs(B[ss_string] - ss_num); ss_ARE += abs(B[ss_string] - ss_num) / (B[ss_string] + 0.0);
        if (C[ss_string]) ss_sum++;
    }

    int css_sum = 0, css_AAE = 0; double css_ARE = 0;
    string css_string; int css_num;
    for (int i = 0; i < K; i++)
    {
        css_string = (css->Query(i)).first; css_num = (css->Query(i)).second;
        css_AAE += abs(B[css_string] - css_num); css_ARE += abs(B[css_string] - css_num) / (B[css_string] + 0.0);
        if (C[css_string]) css_sum++;
    }
    printf("heavkeeper:\nAccepted: %d/%d  %.10f\nARE: %.10f\nAAE: %.10f\n\n", hk_sum, K, (hk_sum / (K + 0.0)), hk_ARE / K, hk_AAE / (K + 0.0));
    printf("LossyCounting:\nAccepted: %d/%d  %.10f\nARE: %.10f\nAAE: %.10f\n\n", LC_sum, K, (LC_sum / (K + 0.0)), LC_ARE / K, LC_AAE / (K + 0.0));
    printf("spacesaving:\nAccepted: %d/%d  %.10f\nARE: %.10f\nAAE: %.10f\n\n", ss_sum, K, (ss_sum / (K + 0.0)), ss_ARE / K, ss_AAE / (K + 0.0));
    printf("CSS:\nAccepted: %d/%d  %.10f\nARE: %.10f\nAAE: %.10f\n\n", css_sum, K, (css_sum / (K + 0.0)), css_ARE / K, css_AAE / (K + 0.0));
    printf("cmsketch:\nAccepted: %d/%d  %.10f\nARE: %.10f\nAAE: %.10f\n\n", cm_sum, K, (cm_sum / (K + 0.0)), cm_ARE / K, cm_AAE / (K + 0.0));
    return 0;
}