summaryrefslogtreecommitdiff
path: root/evaluation/affiliation_bin/generics.py
diff options
context:
space:
mode:
authorZHENG Yanqin <[email protected]>2023-05-25 07:37:53 +0000
committerZHENG Yanqin <[email protected]>2023-05-25 07:37:53 +0000
commite9896bd62bb29da00ec00a121374167ad91bfe47 (patch)
treed94845574c8ef7473d0204d28b4efd4038035463 /evaluation/affiliation_bin/generics.py
parentfad9aa875c84b38cbb5a6010e104922b1eea7291 (diff)
parent4c5734c624705449c6b21c4b2bc5554e7259fdba (diff)
Merge branch 'master' into 'main'HEADmain
readme See merge request zyq/time_series_anomaly_detection!1
Diffstat (limited to 'evaluation/affiliation_bin/generics.py')
-rw-r--r--evaluation/affiliation_bin/generics.py143
1 files changed, 143 insertions, 0 deletions
diff --git a/evaluation/affiliation_bin/generics.py b/evaluation/affiliation_bin/generics.py
new file mode 100644
index 0000000..a2f84be
--- /dev/null
+++ b/evaluation/affiliation_bin/generics.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+from itertools import groupby
+from operator import itemgetter
+import math
+import gzip
+import glob
+import os
+
+
+def convert_vector_to_events(vector=[0, 1, 1, 0, 0, 1, 0]):
+ """
+ Convert a binary vector (indicating 1 for the anomalous instances)
+ to a list of events. The events are considered as durations,
+ i.e. setting 1 at index i corresponds to an anomalous interval [i, i+1).
+
+ :param vector: a list of elements belonging to {0, 1}
+ :return: a list of couples, each couple representing the start and stop of
+ each event
+ """
+ positive_indexes = [idx for idx, val in enumerate(vector) if val > 0]
+ events = []
+ for k, g in groupby(enumerate(positive_indexes), lambda ix: ix[0] - ix[1]):
+ cur_cut = list(map(itemgetter(1), g))
+ events.append((cur_cut[0], cur_cut[-1]))
+
+ # Consistent conversion in case of range anomalies (for indexes):
+ # A positive index i is considered as the interval [i, i+1),
+ # so the last index should be moved by 1
+ events = [(x, y + 1) for (x, y) in events]
+
+ return (events)
+
+
+def infer_Trange(events_pred, events_gt):
+ """
+ Given the list of events events_pred and events_gt, get the
+ smallest possible Trange corresponding to the start and stop indexes
+ of the whole series.
+ Trange will not influence the measure of distances, but will impact the
+ measures of probabilities.
+
+ :param events_pred: a list of couples corresponding to predicted events
+ :param events_gt: a list of couples corresponding to ground truth events
+ :return: a couple corresponding to the smallest range containing the events
+ """
+ if len(events_gt) == 0:
+ raise ValueError('The gt events should contain at least one event')
+ if len(events_pred) == 0:
+ # empty prediction, base Trange only on events_gt (which is non empty)
+ return (infer_Trange(events_gt, events_gt))
+
+ min_pred = min([x[0] for x in events_pred])
+ min_gt = min([x[0] for x in events_gt])
+ max_pred = max([x[1] for x in events_pred])
+ max_gt = max([x[1] for x in events_gt])
+ Trange = (min(min_pred, min_gt), max(max_pred, max_gt))
+ return (Trange)
+
+
+def has_point_anomalies(events):
+ """
+ Checking whether events contain point anomalies, i.e.
+ events starting and stopping at the same time.
+
+ :param events: a list of couples corresponding to predicted events
+ :return: True is the events have any point anomalies, False otherwise
+ """
+ if len(events) == 0:
+ return (False)
+ return (min([x[1] - x[0] for x in events]) == 0)
+
+
+def _sum_wo_nan(vec):
+ """
+ Sum of elements, ignoring math.isnan ones
+
+ :param vec: vector of floating numbers
+ :return: sum of the elements, ignoring math.isnan ones
+ """
+ vec_wo_nan = [e for e in vec if not math.isnan(e)]
+ return (sum(vec_wo_nan))
+
+
+def _len_wo_nan(vec):
+ """
+ Count of elements, ignoring math.isnan ones
+
+ :param vec: vector of floating numbers
+ :return: count of the elements, ignoring math.isnan ones
+ """
+ vec_wo_nan = [e for e in vec if not math.isnan(e)]
+ return (len(vec_wo_nan))
+
+
+def read_gz_data(filename='data/machinetemp_groundtruth.gz'):
+ """
+ Load a file compressed with gz, such that each line of the
+ file is either 0 (representing a normal instance) or 1 (representing)
+ an anomalous instance.
+ :param filename: file path to the gz compressed file
+ :return: list of integers with either 0 or 1
+ """
+ with gzip.open(filename, 'rb') as f:
+ content = f.read().splitlines()
+ content = [int(x) for x in content]
+ return (content)
+
+
+def read_all_as_events():
+ """
+ Load the files contained in the folder `data/` and convert
+ to events. The length of the series is kept.
+ The convention for the file name is: `dataset_algorithm.gz`
+ :return: two dictionaries:
+ - the first containing the list of events for each dataset and algorithm,
+ - the second containing the range of the series for each dataset
+ """
+ filepaths = glob.glob('data/*.gz')
+ datasets = dict()
+ Tranges = dict()
+ for filepath in filepaths:
+ vector = read_gz_data(filepath)
+ events = convert_vector_to_events(vector)
+ # ad hoc cut for those files
+ cut_filepath = (os.path.split(filepath)[1]).split('_')
+ data_name = cut_filepath[0]
+ algo_name = (cut_filepath[1]).split('.')[0]
+ if not data_name in datasets:
+ datasets[data_name] = dict()
+ Tranges[data_name] = (0, len(vector))
+ datasets[data_name][algo_name] = events
+ return (datasets, Tranges)
+
+
+def f1_func(p, r):
+ """
+ Compute the f1 function
+ :param p: precision numeric value
+ :param r: recall numeric value
+ :return: f1 numeric value
+ """
+ return (2 * p * r / (p + r)) \ No newline at end of file