add tracking of tdesktop updates

This commit is contained in:
Il'ya (Marshal) 2022-04-17 23:31:44 +02:00
parent 6a389f4635
commit 65628a2c23
2 changed files with 10 additions and 7 deletions

View file

@ -72,7 +72,7 @@ async def main():
changes[file['status']].append(f'{status} <code>{changed_url}</code>')
alert_text = f'<b>New changes on Telegram sites</b>\n\n'
alert_text = f'<b>New changes of Telegram</b>\n\n'
for i, [status, text_list] in enumerate(changes.items()):
if not text_list:

View file

@ -23,6 +23,8 @@ HIDDEN_URLS = {
'telegram.org/evolution',
'desktop.telegram.org/changelog',
'td.telegram.org/current',
'td.telegram.org/current2',
'osx.telegram.org/updates/versions.xml', # stable
'api.appcenter.ms/v0.1/public/sparkle/apps/6ed2ac30-49e1-4073-87c2-f1ffcb74e81f', # beta
@ -36,9 +38,6 @@ HIDDEN_URLS = {
'core.telegram.org/video_stickers',
'core.telegram.org/stickers',
# temp
'telegram.org/blog/video-stickers-better-reactions',
'promote.telegram.org',
'contest.com',
@ -112,6 +111,7 @@ CRAWL_RULES = {
'telegram.org': {
'deny': {
r'apps$',
r'img/StickerExample.psd$',
},
},
'webz.telegram.org': {
@ -231,6 +231,7 @@ def cleanup_links(links: set[str]) -> set[str]:
link = link.replace('www.', '')
link = link.replace('http://', '').replace('https://', '')
link = link.replace('//', '/') # not a universal solution
link = link.replace('"', '') # regex fix hack
# skip anchor links
if '#' in link:
@ -262,6 +263,7 @@ def is_trackable_content_type(content_type) -> bool:
'gif',
'mp4',
'webm',
'application/octet-stream', # td updates
)
for trackable_content_type in trackable_content_types:
@ -321,11 +323,12 @@ async def crawl(url: str, session: aiohttp.ClientSession):
LINKS_TO_TRACK.remove(f'{without_trailing_slash}/')
except UnicodeDecodeError:
logger.warning(f'Codec can\'t decode bytes. So it was a tgs file or response with broken content type {url}')
except ClientConnectorError:
logger.warning(f'Wrong link: {url}')
except (ServerDisconnectedError, TimeoutError):
# except ClientConnectorError:
# logger.warning(f'Wrong link: {url}')
except (ServerDisconnectedError, TimeoutError, ClientConnectorError):
logger.warning(f'Client or timeout error. Retrying {url}')
VISITED_LINKS.remove(url)
# sleep + count of attempts?
await asyncio.gather(crawl(url, session))