fix resending of requests;

fix timeout settings;
fix handling of invalid urls;
edit schedule of generation list to test.
This commit is contained in:
Il'ya (Marshal) 2021-04-25 08:17:58 +02:00
parent eb723304ce
commit 2dffddeaae
2 changed files with 6 additions and 3 deletions

View file

@ -2,7 +2,7 @@ name: Generate or update list of tracked links
on:
schedule:
- cron: '0 * * * *'
- cron: '* * * * *'
push:
branches:
- main

View file

@ -179,7 +179,7 @@ async def crawl(url: str, session: aiohttp.ClientSession):
try:
logger.info(f'[{len(VISITED_LINKS)}] Process {url}')
async with session.get(f'{PROTOCOL}{url}', allow_redirects=False) as response:
async with session.get(f'{PROTOCOL}{url}', allow_redirects=False, timeout=TIMEOUT) as response:
status_code = response.status
content_type = response.headers.get('content-type')
@ -214,8 +214,11 @@ async def crawl(url: str, session: aiohttp.ClientSession):
LINKS_TO_TRACK.remove(f'{without_trailing_slash}/')
except UnicodeDecodeError:
logger.warning('Codec can\'t decode byte. So its was a tgs file')
except (TimeoutError, ClientConnectorError):
except ClientConnectorError:
logger.warning(f'Wrong link: {url}')
except TimeoutError:
logger.warning(f'Retrying {url}')
VISITED_LINKS.remove(url)
await asyncio.gather(crawl(url, session))