mirror of
https://github.com/MarshalX/telegram-crawler.git
synced 2025-01-20 16:15:08 +01:00
resend requests with 5XX status code
This commit is contained in:
parent
1aa74c325e
commit
7aee3658e3
2 changed files with 6 additions and 3 deletions
|
@ -40,7 +40,8 @@ async def crawl(url: str, session: aiohttp.ClientSession):
|
|||
try:
|
||||
logger.info(f'Process {url}')
|
||||
async with session.get(f'{PROTOCOL}{url}', allow_redirects=False) as response:
|
||||
if response.status == 500:
|
||||
if response.status // 100 == 5:
|
||||
logger.warning(f'Error 5XX. Retrying {url}')
|
||||
return await asyncio.gather(crawl(url, session))
|
||||
|
||||
if response.status not in {200, 304}:
|
||||
|
@ -68,6 +69,7 @@ async def crawl(url: str, session: aiohttp.ClientSession):
|
|||
logger.info(f'Write to {filename}')
|
||||
await f.write(content)
|
||||
except (TimeoutError, ClientConnectorError):
|
||||
logger.warning(f'Client or timeout error. Retrying {url}')
|
||||
await asyncio.gather(crawl(url, session))
|
||||
|
||||
|
||||
|
|
|
@ -229,8 +229,9 @@ async def crawl(url: str, session: aiohttp.ClientSession):
|
|||
async with session.get(f'{PROTOCOL}{url}', allow_redirects=False, timeout=TIMEOUT) as response:
|
||||
content_type = response.headers.get('content-type')
|
||||
|
||||
if response.status == 500:
|
||||
if response.status // 100 == 5:
|
||||
VISITED_LINKS.remove(url)
|
||||
logger.warning(f'Error 5XX. Retrying {url}')
|
||||
return await asyncio.gather(crawl(url, session))
|
||||
|
||||
if response.status not in {200, 304}:
|
||||
|
@ -272,7 +273,7 @@ async def crawl(url: str, session: aiohttp.ClientSession):
|
|||
except ClientConnectorError:
|
||||
logger.warning(f'Wrong link: {url}')
|
||||
except (ServerDisconnectedError, TimeoutError):
|
||||
logger.warning(f'Retrying {url}')
|
||||
logger.warning(f'Client or timeout error. Retrying {url}')
|
||||
VISITED_LINKS.remove(url)
|
||||
await asyncio.gather(crawl(url, session))
|
||||
|
||||
|
|
Loading…
Reference in a new issue