print lead time

This commit is contained in:
Il'ya (Marshal) 2021-04-23 22:49:21 +02:00
parent 415a9e9b6c
commit e385f45da6

View file

@ -1,8 +1,9 @@
import os
import asyncio import asyncio
import logging import logging
import os
import re import re
from html import unescape from html import unescape
from time import time
from urllib.parse import unquote from urllib.parse import unquote
import aiohttp import aiohttp
@ -186,7 +187,7 @@ async def crawl(url: str, session: aiohttp.ClientSession):
# TODO track hashes of image/svg/video content types # TODO track hashes of image/svg/video content types
logger.info(f'Unhandled type: {content_type}') logger.info(f'Unhandled type: {content_type}')
except: except:
logger.error('Codec can\'t decode byte. So its was a tgs file') logger.warning('Codec can\'t decode byte. So its was a tgs file')
async def start(url_list: set[str]): async def start(url_list: set[str]):
@ -197,7 +198,10 @@ async def start(url_list: set[str]):
if __name__ == '__main__': if __name__ == '__main__':
HIDDEN_URLS.add(BASE_URL) HIDDEN_URLS.add(BASE_URL)
logger.info('Start crawling...')
start_time = time()
asyncio.get_event_loop().run_until_complete(start(HIDDEN_URLS)) asyncio.get_event_loop().run_until_complete(start(HIDDEN_URLS))
logger.info(f'Stop crawling. {time() - start_time} sec.')
with open(OUTPUT_FILENAME, 'w') as f: with open(OUTPUT_FILENAME, 'w') as f:
f.write('\n'.join(sorted(LINKS_TO_TRACK))) f.write('\n'.join(sorted(LINKS_TO_TRACK)))