mirror of
https://github.com/MarshalX/telegram-crawler.git
synced 2025-01-07 11:15:42 +01:00
resend requests with server error response
This commit is contained in:
parent
0b86273326
commit
12672aba45
2 changed files with 10 additions and 8 deletions
|
@ -38,12 +38,13 @@ async def crawl(url: str, session: aiohttp.ClientSession):
|
|||
try:
|
||||
logger.info(f'Process {url}')
|
||||
async with session.get(f'{PROTOCOL}{url}', allow_redirects=False) as response:
|
||||
if response.status == 302:
|
||||
return
|
||||
if response.status == 500:
|
||||
return await asyncio.gather(crawl(url, session))
|
||||
|
||||
if response.status != 200:
|
||||
content = await response.text()
|
||||
logger.debug(f'Skip {url} because status code == {response.status}. Content: {content}')
|
||||
if response.status != 302:
|
||||
content = await response.text()
|
||||
logger.debug(f'Skip {url} because status code == {response.status}. Content: {content}')
|
||||
return
|
||||
|
||||
# bypass external slashes and so on
|
||||
|
|
|
@ -198,12 +198,13 @@ async def crawl(url: str, session: aiohttp.ClientSession):
|
|||
async with session.get(f'{PROTOCOL}{url}', allow_redirects=False, timeout=TIMEOUT) as response:
|
||||
content_type = response.headers.get('content-type')
|
||||
|
||||
if response.status == 302:
|
||||
return
|
||||
if response.status == 500:
|
||||
return await asyncio.gather(crawl(url, session))
|
||||
|
||||
if response.status != 200:
|
||||
content = await response.text()
|
||||
logger.debug(f'Skip {url} because status code == {response.status}. Content: {content}')
|
||||
if response.status != 302:
|
||||
content = await response.text()
|
||||
logger.debug(f'Skip {url} because status code == {response.status}. Content: {content}')
|
||||
return
|
||||
|
||||
if 'text/html' in content_type:
|
||||
|
|
Loading…
Reference in a new issue