diff options
Diffstat (limited to 'analyzer/get_chain.py')
| -rw-r--r-- | analyzer/get_chain.py | 66 |
1 files changed, 48 insertions, 18 deletions
diff --git a/analyzer/get_chain.py b/analyzer/get_chain.py index d598086d..9e3226ed 100644 --- a/analyzer/get_chain.py +++ b/analyzer/get_chain.py @@ -1,6 +1,8 @@ import collections import pandas as pd import numpy as np +import tqdm +import time import threading from Tools.domain_extract import Extracter from DBopt.service2db import Service2DB @@ -120,7 +122,7 @@ class GetService: return denominator @staticmethod - def calScore(service, denominator): + def calScore(service, denominator, showProcess): """ calculate the score for each resource: for javascript: we use events' 14-dimention vector divide denominator and add them together @@ -134,23 +136,51 @@ class GetService: b's score = b's score + c'score c's score = c's score """ - for key, prop in service.items(): - # other resource - if key.startswith("r"): - offset = prop["resource_type"] - prop["score"] = 1 / denominator[offset - 7] - continue + if showProcess: + print("计算js评分:") + print("计算资源渲染行为比重:NodeCreation, NodeInsertion, " + "NodeRemoval, NodeAttachLater, AttrAddition, AttrModification, AttrRemoval, " + "AttrStyleTextAddition, NetworkScriptRequest, NetworkImageRequest, " + "NetworkIframeRequest, NetworkXMLHTTPRequest, NetworkLinkRequest, NetworkVideoRequest") + if showProcess: + for key, prop in tqdm.tqdm(service.items()): + time.sleep(0.05) + # other resource + if key.startswith("r"): + offset = prop["resource_type"] + prop["score"] = 1 / denominator[offset - 7] + continue - # script resource - event_vec = np.array(list(prop["events"].values())) - prop["score"] = sum(np.divide(event_vec, denominator)) + # script resource + event_vec = np.array(list(prop["events"].values())) + # print(event_vec) + prop["score"] = sum(np.divide(event_vec, denominator)) - # the chain solution - parent = prop["parent"] - if parent not in service: - continue - if parent != "0": - service[parent]["score"] += prop["score"] + # the chain solution + parent = prop["parent"] + if parent not in service: + continue + if parent != "0": + service[parent]["score"] += prop["score"] + else: + for key, prop in service.items(): + # other resource + if key.startswith("r"): + offset = prop["resource_type"] + prop["score"] = 1 / denominator[offset - 7] + continue + + # script resource + event_vec = np.array(list(prop["events"].values())) + # print(event_vec) + prop["score"] = sum(np.divide(event_vec, denominator)) + + # the chain solution + parent = prop["parent"] + if parent not in service: + continue + if parent != "0": + service[parent]["score"] += prop["score"] @staticmethod def event2Num(event_type): @@ -169,7 +199,7 @@ class GetService: } return e2n.get(event_type, 0) - def run(self, filename): + def run(self, filename, showProcess): print(threading.current_thread().name + ':' + filename) service = dict() # to find a script's parent, we need to map script_url to script_id @@ -243,7 +273,7 @@ class GetService: denominator = self.calEventsNum(data["timeline"]) - self.calScore(service, denominator) + self.calScore(service, denominator, showProcess) """ save to mysql |
