From 14fb9bc6fc65a498b37a9ccc4dc992760093f59f Mon Sep 17 00:00:00 2001
From: "Il'ya (Marshal)" <ilya@marshal.by>
Date: Mon, 18 Apr 2022 00:11:21 +0200
Subject: [PATCH] a little speedup

---
 make_files_tree.py         | 25 +++++++++++++------------
 make_tracked_links_list.py |  9 +++++++--
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/make_files_tree.py b/make_files_tree.py
index fb91f2a671..84effb9c5d 100644
--- a/make_files_tree.py
+++ b/make_files_tree.py
@@ -13,7 +13,7 @@ from typing import List
 
 import aiofiles
 import aiohttp
-from aiohttp import ClientConnectorError
+from aiohttp import ClientConnectorError, ServerDisconnectedError
 
 PROTOCOL = 'https://'
 ILLEGAL_PATH_CHARS = punctuation.replace('.', '') + whitespace
@@ -229,15 +229,15 @@ async def collect_translations_paginated_content(url: str, session: aiohttp.Clie
             ) as response:
                 if response.status != 200:
                     logger.debug(f'Resend cuz {response.status}')
-                    return await asyncio.gather(_get_page(offset))
+                    return await _get_page(offset)
 
                 json = await response.json(encoding='UTF-8')
                 if 'more_html' in json and json['more_html']:
                     content.append(json['more_html'])
-                    await asyncio.gather(_get_page(offset + 200))
+                    await _get_page(offset + 200)
         except (TimeoutError, ClientConnectorError):
             logger.warning(f'Client or timeout error. Retrying {url}; offset {offset}')
-            await asyncio.gather(_get_page(offset))
+            await _get_page(offset)
 
     await _get_page(0)
 
@@ -267,7 +267,7 @@ async def crawl(url: str, session: aiohttp.ClientSession):
         async with session.get(f'{PROTOCOL}{url}', allow_redirects=False, timeout=TIMEOUT) as response:
             if response.status // 100 == 5:
                 logger.warning(f'Error 5XX. Retrying {url}')
-                return await asyncio.gather(crawl(url, session))
+                return await crawl(url, session)
 
             if response.status not in {200, 304}:
                 if response.status != 302:
@@ -318,18 +318,19 @@ async def crawl(url: str, session: aiohttp.ClientSession):
 
                 logger.info(f'Write to {filename}')
                 await f.write(content)
-    except (TimeoutError, ClientConnectorError):
+    except (ServerDisconnectedError, TimeoutError, ClientConnectorError):
         logger.warning(f'Client or timeout error. Retrying {url}')
-        await asyncio.gather(crawl(url, session))
+        await crawl(url, session)
 
 
 async def start(url_list: set[str]):
     async with aiohttp.ClientSession(connector=CONNECTOR) as session:
-        await asyncio.gather(*[crawl(url, session) for url in url_list])
-
-        # yeap it will be called each run, and what? ;d
-        await download_telegram_android_beta_and_extract_resources(session)
-        await download_telegram_macos_beta_and_extract_resources(session)
+        await asyncio.gather(
+            *[crawl(url, session) for url in url_list],
+            # yeap it will be called each run, and what? ;d
+            download_telegram_android_beta_and_extract_resources(session),
+            download_telegram_macos_beta_and_extract_resources(session),
+        )
 
 
 if __name__ == '__main__':
diff --git a/make_tracked_links_list.py b/make_tracked_links_list.py
index d896c3f9dc..d73d091936 100644
--- a/make_tracked_links_list.py
+++ b/make_tracked_links_list.py
@@ -71,6 +71,11 @@ CRAWL_RULES = {
             '',  # all
         }
     },
+    'osx.telegram.org': {
+      'deny': {
+          'updates/Telegram'
+      }
+    },
     'bugs.telegram.org': {  # crawl first page of cards sorted by rating
         'deny': {
             # r'/c/[0-9]+/[0-9]+',  # disable comments
@@ -286,7 +291,7 @@ async def crawl(url: str, session: aiohttp.ClientSession):
             if response.status // 100 == 5:
                 VISITED_LINKS.remove(url)
                 logger.warning(f'Error 5XX. Retrying {url}')
-                return await asyncio.gather(crawl(url, session))
+                return await crawl(url, session)
 
             if response.status not in {200, 304}:
                 if response.status != 302:
@@ -329,7 +334,7 @@ async def crawl(url: str, session: aiohttp.ClientSession):
         logger.warning(f'Client or timeout error. Retrying {url}')
         VISITED_LINKS.remove(url)
         # sleep + count of attempts?
-        await asyncio.gather(crawl(url, session))
+        await crawl(url, session)
 
 
 async def start(url_list: set[str]):