From d0c516ae055ae0a2b24f3704910df25d0e01e19a Mon Sep 17 00:00:00 2001
From: "Ilya (Marshal)" <ilya@marshal.dev>
Date: Sat, 28 Oct 2023 21:13:49 +0200
Subject: [PATCH] fix content type

---
 make_files_tree.py | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/make_files_tree.py b/make_files_tree.py
index 2271bcd47c..3c67898558 100644
--- a/make_files_tree.py
+++ b/make_files_tree.py
@@ -706,20 +706,30 @@ async def _crawl(url: str, session: aiohttp.ClientSession, output_dir: str):
         # bypass external slashes and so on
         url_parts = [p for p in url.split('/') if p not in ILLEGAL_PATH_CHARS]
 
-        is_hashable_only = is_hashable_only_content_type(response.content_type)
-        # amazing dirt for media files like
-        # telegram.org/file/811140591/1/q7zZHjgES6s/9d121a89ffb0015837
-        # with response content type HTML instead of image.
-        # shame on you.
-        # sometimes it returns a correct type. noice load balancing
-        is_sucking_file = '/file/' in url and 'text' in response.content_type
+        content_type = response.content_type
 
         # handle pure domains and html pages without ext in url as html do enable syntax highlighting
         page_type, _ = mimetypes.guess_type(url)
         if url.endswith('.tl'):
             page_type = 'text/plain'
 
-        ext = '.html' if page_type is None or len(url_parts) == 1 else ''
+        ext = ''
+        if page_type is None or len(url_parts) == 1:
+            ext = '.html'
+            content_type = 'text/html'
+
+        if re.search(TRANSLATIONS_EN_CATEGORY_URL_REGEX, url) or 'td.telegram.org/current' in url:
+            ext = '.json'
+            content_type = 'application/json'
+
+        is_hashable_only = is_hashable_only_content_type(content_type)
+        # amazing dirt for media files like
+        # telegram.org/file/811140591/1/q7zZHjgES6s/9d121a89ffb0015837
+        # with response content type HTML instead of image.
+        # shame on you.
+        # sometimes it returns a correct type.
+        # noice load balancing
+        is_sucking_file = '/file/' in url and 'text' in content_type
 
         # I don't add ext by content type for images, and so on cuz TG servers suck.
         # Some servers do not return a correct content type.
@@ -727,9 +737,6 @@ async def _crawl(url: str, session: aiohttp.ClientSession, output_dir: str):
         if is_hashable_only or is_sucking_file:
             ext = ''
 
-        if re.search(TRANSLATIONS_EN_CATEGORY_URL_REGEX, url):
-            ext = '.json'
-
         filename = os.path.join(output_dir, *url_parts) + ext
         os.makedirs(os.path.dirname(filename), exist_ok=True)