summaryrefslogtreecommitdiff
path: root/homepageAve.py
diff options
context:
space:
mode:
Diffstat (limited to 'homepageAve.py')
-rw-r--r--homepageAve.py116
1 files changed, 116 insertions, 0 deletions
diff --git a/homepageAve.py b/homepageAve.py
new file mode 100644
index 0000000..7ea4726
--- /dev/null
+++ b/homepageAve.py
@@ -0,0 +1,116 @@
+# 首页ipv6访问成功率和时延 成功率可以用requests,访问时延用selenium
+import requests
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+import time
+from tqdm import tqdm
+import csv
+from multiprocessing import Process, Lock
+
+
+# 单页面访问是否成功, 返回是状态码
+def page_status(url):
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'}
+ try:
+ r = requests.get(url, headers=headers, verify=False, timeout=10)
+ status = r.status_code
+ except Exception:
+ status = 404
+ return status
+
+
+# 单页面单次访问加载时间
+def page_time(url):
+ opts = Options()
+ opts.headless = True
+ driver = webdriver.Chrome(options=opts)
+ driver.set_page_load_timeout(20) # 设置加载超时时间
+ try:
+ start = time.perf_counter()
+ driver.get(url)
+ end = time.perf_counter()
+ delay = end - start # 网页加载时间
+ except Exception:
+ delay = float(20)
+ driver.close()
+ return delay
+
+# 网站首页10次访问的成功次数与平均时延
+def deal_websites(websites, start, end, lock):
+ for count in tqdm(range(start, end)):
+ web = websites[count]
+ success_status = 0
+ ave_delay = float(0)
+ if web.startswith('http'):
+ url = web
+ else:
+ url = 'http://' + web
+ for i in range(0, 10):
+ status = page_status(url)
+ if int(status) < 400: # 访问成功
+ success_status += 1
+ else:
+ continue
+
+ for j in range(0, 10):
+ delay = page_time(url)
+ ave_delay += delay
+
+ lock.acquire()
+ ff = open('./result/websitesTimeInfo.csv', 'a', encoding='utf-8', newline='')
+ # 访问网站首页10次,记录成功访问次数和平均加载时延
+ wr = csv.writer(ff)
+ row = [web, success_status, (ave_delay / 10)]
+ wr.writerow(row)
+ ff.close()
+ lock.release()
+
+# 批量获得网站首页的响应响应码
+def deal_urls(urls, start, end, lock):
+ for count in tqdm(range(start, end)):
+ url = urls[count]
+ status = page_status(url)
+ lock.acquire()
+ ff = open('./result/urls_status.csv', 'a', encoding='utf-8', newline='')
+ wr = csv.writer(ff)
+ row = [url, status]
+ wr.writerow(row)
+ ff.close()
+ lock.release()
+
+
+def get_webs():
+ # 待检测的网页列表
+ webs = []
+ f = open('./data/websites.txt', 'r') # 读取的网站列表
+ # txt存储格式: 每行一个网站首页网址或者域名
+ for line in f:
+ line = line.strip('\n')
+ if line.startswith('http'):
+ pass
+ else:
+ line = 'http://' + line
+ webs.append(line)
+ f.close()
+ return webs
+
+
+if __name__ == '__main__':
+ websites = get_webs()
+ lock = Lock()
+ process_list = []
+ p1 = Process(target=deal_urls, args=(websites, 0, 10, lock)) #实例化进程对象
+ p1.start()
+ process_list.append(p1)
+ p2 = Process(target=deal_urls, args=(websites, 10, 20, lock)) # 实例化进程对象
+ p2.start()
+ process_list.append(p2)
+ p3 = Process(target=deal_urls, args=(websites, 20, 30, lock)) # 实例化进程对象
+ p3.start()
+ process_list.append(p3)
+
+
+ for p in process_list:
+ p.join()
+