diff options
Diffstat (limited to 'homepageAve.py')
| -rw-r--r-- | homepageAve.py | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/homepageAve.py b/homepageAve.py new file mode 100644 index 0000000..7ea4726 --- /dev/null +++ b/homepageAve.py @@ -0,0 +1,116 @@ +# 首页ipv6访问成功率和时延 成功率可以用requests,访问时延用selenium +import requests +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +import time +from tqdm import tqdm +import csv +from multiprocessing import Process, Lock + + +# 单页面访问是否成功, 返回是状态码 +def page_status(url): + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'} + try: + r = requests.get(url, headers=headers, verify=False, timeout=10) + status = r.status_code + except Exception: + status = 404 + return status + + +# 单页面单次访问加载时间 +def page_time(url): + opts = Options() + opts.headless = True + driver = webdriver.Chrome(options=opts) + driver.set_page_load_timeout(20) # 设置加载超时时间 + try: + start = time.perf_counter() + driver.get(url) + end = time.perf_counter() + delay = end - start # 网页加载时间 + except Exception: + delay = float(20) + driver.close() + return delay + +# 网站首页10次访问的成功次数与平均时延 +def deal_websites(websites, start, end, lock): + for count in tqdm(range(start, end)): + web = websites[count] + success_status = 0 + ave_delay = float(0) + if web.startswith('http'): + url = web + else: + url = 'http://' + web + for i in range(0, 10): + status = page_status(url) + if int(status) < 400: # 访问成功 + success_status += 1 + else: + continue + + for j in range(0, 10): + delay = page_time(url) + ave_delay += delay + + lock.acquire() + ff = open('./result/websitesTimeInfo.csv', 'a', encoding='utf-8', newline='') + # 访问网站首页10次,记录成功访问次数和平均加载时延 + wr = csv.writer(ff) + row = [web, success_status, (ave_delay / 10)] + wr.writerow(row) + ff.close() + lock.release() + +# 批量获得网站首页的响应响应码 +def deal_urls(urls, start, end, lock): + for count in tqdm(range(start, end)): + url = urls[count] + status = page_status(url) + lock.acquire() + ff = open('./result/urls_status.csv', 'a', encoding='utf-8', newline='') + wr = csv.writer(ff) + row = [url, status] + wr.writerow(row) + ff.close() + lock.release() + + +def get_webs(): + # 待检测的网页列表 + webs = [] + f = open('./data/websites.txt', 'r') # 读取的网站列表 + # txt存储格式: 每行一个网站首页网址或者域名 + for line in f: + line = line.strip('\n') + if line.startswith('http'): + pass + else: + line = 'http://' + line + webs.append(line) + f.close() + return webs + + +if __name__ == '__main__': + websites = get_webs() + lock = Lock() + process_list = [] + p1 = Process(target=deal_urls, args=(websites, 0, 10, lock)) #实例化进程对象 + p1.start() + process_list.append(p1) + p2 = Process(target=deal_urls, args=(websites, 10, 20, lock)) # 实例化进程对象 + p2.start() + process_list.append(p2) + p3 = Process(target=deal_urls, args=(websites, 20, 30, lock)) # 实例化进程对象 + p3.start() + process_list.append(p3) + + + for p in process_list: + p.join() + |
