diff options
Diffstat (limited to 'test')
| -rw-r--r-- | test/utils.cpp | 147 | ||||
| -rw-r--r-- | test/utils.hpp | 23 |
2 files changed, 168 insertions, 2 deletions
diff --git a/test/utils.cpp b/test/utils.cpp index b53e4e4..4015dd4 100644 --- a/test/utils.cpp +++ b/test/utils.cpp @@ -8,7 +8,10 @@ #include <random> #include <string.h> #include <algorithm> - +#include <fstream> +#include <math.h> +#include <unistd.h> +#include <sstream> #include "fieldstat.h" #include "utils.hpp" @@ -293,3 +296,145 @@ double test_cal_topk_accuracy(vector<struct Fieldstat_tag_list_wrapper *> &test_ double accuracy = (double)correct / test_result.size(); return accuracy; } + + +//=========================================================================== +//= Function to generate Zipf (power law) distributed random variables = +//= - Input: alpha and N = +//= - Output: Returns with Zipf distributed random variable = +//=========================================================================== +int zipf(double alpha, int n) +{ + static bool first = true; // Static first time flag + static double c = 0; // Normalization constant + double z; // Uniform random number (0 < z < 1) + double sum_prob; // Sum of probabilities + double zipf_value; // Computed exponential value to be returned + int i; // Loop counter + + // Compute normalization constant on first call only + if (first) + { + for (i=1; i<=n; i++) + c = c + (1.0 / pow((double) i, alpha)); + c = 1.0 / c; + first = false; + } + + // Pull a uniform random number (0 < z < 1) + do + { + z = (double)rand() / (double)RAND_MAX; + } + while ((z == 0.0) || (z == 1.0)); + + // Map z to the value + sum_prob = 0; + for (i=1; i<=n; i++) + { + sum_prob = sum_prob + c / pow((double) i, alpha); + if (sum_prob >= z) + { + zipf_value = i; + break; + } + } + + return(zipf_value); +} + + +// class SpreadSketchZipfGenerator { +// private: +// const int MAX_DATA = 1000000; +// std::pair<std::string, std::string> *loadeds; +// unsigned cursor; + +// public: +// SpreadSketchZipfGenerator(double alpha, int n) { + +// } + +// struct Flow next() { +// int r_cursor = cursor % MAX_DATA; +// struct Flow flow; +// flow.src_ip = loadeds[r_cursor].first; +// flow.dst_ip = loadeds[r_cursor].second; + +// cursor++; + +// return flow; +// } + +// ~SpreadSketchZipfGenerator() { +// delete[] loadeds; +// } + +// double _alpha; +// int _n; +// }; + +SpreadSketchZipfGenerator::SpreadSketchZipfGenerator(double alpha, int n) { + _alpha = alpha; + _n = n; + + // generate data and write them to file + std::string filename = "zipf_" + std::to_string(alpha) + "_" + std::to_string(n) + ".txt"; + + std::unordered_map<int, int> fanout_map; // src_ip_id -> fanout being used + + if (access(filename.c_str(), F_OK) != 0) { + printf("file %s not found, generating data\n", filename.c_str()); + + std::ofstream file(filename); + if (!file.is_open()) { + printf("failed to open file %s\n", filename.c_str()); + return; + } + + for (int i = 0; i < MAX_DATA; i++) { + int src_id = zipf(alpha, n); + int fanout = fanout_map.find(src_id) == fanout_map.end() ? 0 : fanout_map[src_id]; + fanout_map[src_id] = fanout + 1; + + file << "s_" << src_id << " d_" << fanout << std::endl; + } + + file.close(); + printf("data generated and saved to file %s\n", filename.c_str()); + } + + // load data + std::ifstream file(filename); + if (!file.is_open()) { + printf("failed to open file %s\n", filename.c_str()); + return; + } + + loadeds = new std::pair<std::string, std::string>[MAX_DATA]; + std::string line; + int i = 0; + while (std::getline(file, line) && i < MAX_DATA) { + std::istringstream iss(line); + std::string src_ip, dst_ip; + iss >> src_ip >> dst_ip; + loadeds[i] = std::make_pair(src_ip, dst_ip); + i++; + } + file.close(); +} + +SpreadSketchZipfGenerator::~SpreadSketchZipfGenerator() { + delete[] loadeds; +} + +struct Flow SpreadSketchZipfGenerator::next() { + int r_cursor = cursor % MAX_DATA; + struct Flow flow; + flow.src_ip = loadeds[r_cursor].first; + flow.dst_ip = loadeds[r_cursor].second; + + cursor++; + + return flow; +}
\ No newline at end of file diff --git a/test/utils.hpp b/test/utils.hpp index 28dea2b..f758f1d 100644 --- a/test/utils.hpp +++ b/test/utils.hpp @@ -44,4 +44,25 @@ private: double test_cal_topk_accuracy(std::vector<struct Fieldstat_tag_list_wrapper *> &test_result, std::unordered_map<std::string, int> &expected_count); // after we change fieldstat_counter_get return a error code in, all the tests should change correspondingly, so just use a adapter aliasing the old function -long long my_fieldstat_counter_get(const struct fieldstat *instance, int cube_id, int metric_id, int cell_id);
\ No newline at end of file +long long my_fieldstat_counter_get(const struct fieldstat *instance, int cube_id, int metric_id, int cell_id); + + +struct Flow { + std::string src_ip; + std::string dst_ip; +}; + +class SpreadSketchZipfGenerator { +private: + const int MAX_DATA = 1000000; + std::pair<std::string, std::string> *loadeds; + unsigned cursor; + +public: + SpreadSketchZipfGenerator(double alpha, int n); + struct Flow next(); + ~SpreadSketchZipfGenerator(); + + double _alpha; + int _n; +};
\ No newline at end of file |
