From e385f45da6d5cc0b354110059451ca02128b1687 Mon Sep 17 00:00:00 2001 From: "Il'ya (Marshal)" Date: Fri, 23 Apr 2021 22:49:21 +0200 Subject: [PATCH] print lead time --- make_tracked_links_list.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/make_tracked_links_list.py b/make_tracked_links_list.py index 0f70f3fcb0..804d27462c 100644 --- a/make_tracked_links_list.py +++ b/make_tracked_links_list.py @@ -1,8 +1,9 @@ -import os import asyncio import logging +import os import re from html import unescape +from time import time from urllib.parse import unquote import aiohttp @@ -186,7 +187,7 @@ async def crawl(url: str, session: aiohttp.ClientSession): # TODO track hashes of image/svg/video content types logger.info(f'Unhandled type: {content_type}') except: - logger.error('Codec can\'t decode byte. So its was a tgs file') + logger.warning('Codec can\'t decode byte. So its was a tgs file') async def start(url_list: set[str]): @@ -197,7 +198,10 @@ async def start(url_list: set[str]): if __name__ == '__main__': HIDDEN_URLS.add(BASE_URL) + logger.info('Start crawling...') + start_time = time() asyncio.get_event_loop().run_until_complete(start(HIDDEN_URLS)) + logger.info(f'Stop crawling. {time() - start_time} sec.') with open(OUTPUT_FILENAME, 'w') as f: f.write('\n'.join(sorted(LINKS_TO_TRACK)))