summaryrefslogtreecommitdiff
path: root/analyzer/get_chain.py
diff options
context:
space:
mode:
Diffstat (limited to 'analyzer/get_chain.py')
-rw-r--r--analyzer/get_chain.py66
1 files changed, 48 insertions, 18 deletions
diff --git a/analyzer/get_chain.py b/analyzer/get_chain.py
index d598086d..9e3226ed 100644
--- a/analyzer/get_chain.py
+++ b/analyzer/get_chain.py
@@ -1,6 +1,8 @@
import collections
import pandas as pd
import numpy as np
+import tqdm
+import time
import threading
from Tools.domain_extract import Extracter
from DBopt.service2db import Service2DB
@@ -120,7 +122,7 @@ class GetService:
return denominator
@staticmethod
- def calScore(service, denominator):
+ def calScore(service, denominator, showProcess):
"""
calculate the score for each resource:
for javascript: we use events' 14-dimention vector divide denominator and add them together
@@ -134,23 +136,51 @@ class GetService:
b's score = b's score + c'score
c's score = c's score
"""
- for key, prop in service.items():
- # other resource
- if key.startswith("r"):
- offset = prop["resource_type"]
- prop["score"] = 1 / denominator[offset - 7]
- continue
+ if showProcess:
+ print("计算js评分:")
+ print("计算资源渲染行为比重:NodeCreation, NodeInsertion, "
+ "NodeRemoval, NodeAttachLater, AttrAddition, AttrModification, AttrRemoval, "
+ "AttrStyleTextAddition, NetworkScriptRequest, NetworkImageRequest, "
+ "NetworkIframeRequest, NetworkXMLHTTPRequest, NetworkLinkRequest, NetworkVideoRequest")
+ if showProcess:
+ for key, prop in tqdm.tqdm(service.items()):
+ time.sleep(0.05)
+ # other resource
+ if key.startswith("r"):
+ offset = prop["resource_type"]
+ prop["score"] = 1 / denominator[offset - 7]
+ continue
- # script resource
- event_vec = np.array(list(prop["events"].values()))
- prop["score"] = sum(np.divide(event_vec, denominator))
+ # script resource
+ event_vec = np.array(list(prop["events"].values()))
+ # print(event_vec)
+ prop["score"] = sum(np.divide(event_vec, denominator))
- # the chain solution
- parent = prop["parent"]
- if parent not in service:
- continue
- if parent != "0":
- service[parent]["score"] += prop["score"]
+ # the chain solution
+ parent = prop["parent"]
+ if parent not in service:
+ continue
+ if parent != "0":
+ service[parent]["score"] += prop["score"]
+ else:
+ for key, prop in service.items():
+ # other resource
+ if key.startswith("r"):
+ offset = prop["resource_type"]
+ prop["score"] = 1 / denominator[offset - 7]
+ continue
+
+ # script resource
+ event_vec = np.array(list(prop["events"].values()))
+ # print(event_vec)
+ prop["score"] = sum(np.divide(event_vec, denominator))
+
+ # the chain solution
+ parent = prop["parent"]
+ if parent not in service:
+ continue
+ if parent != "0":
+ service[parent]["score"] += prop["score"]
@staticmethod
def event2Num(event_type):
@@ -169,7 +199,7 @@ class GetService:
}
return e2n.get(event_type, 0)
- def run(self, filename):
+ def run(self, filename, showProcess):
print(threading.current_thread().name + ':' + filename)
service = dict()
# to find a script's parent, we need to map script_url to script_id
@@ -243,7 +273,7 @@ class GetService:
denominator = self.calEventsNum(data["timeline"])
- self.calScore(service, denominator)
+ self.calScore(service, denominator, showProcess)
"""
save to mysql