extend info about translations (url, photo, binding, obj structure); add translations hashtag; improve error handling of translations.

This commit is contained in:
Il'ya (Marshal) 2022-06-18 16:50:14 +02:00
parent 81c2684608
commit ec10d811e6
3 changed files with 31 additions and 7 deletions

View file

@ -41,7 +41,7 @@ STATUS_TO_EMOJI = {
}
AVAILABLE_HASHTAGS = {
'web_res', 'web', 'server', 'test_server', 'client', 'ios', 'macos', 'android'
'web_res', 'web', 'server', 'test_server', 'client', 'ios', 'macos', 'android', 'translations'
}
HASHTAGS_PATTERNS = {
# regex will be more flexible. for example, in issue with double hashtag '#web #web_res' when data/res not changed
@ -53,6 +53,7 @@ HASHTAGS_PATTERNS = {
'ios': os.path.join(ROOT_TREE_DIR, 'client', 'ios-beta'),
'macos': os.path.join(ROOT_TREE_DIR, 'client', 'macos-beta'),
'android': os.path.join(ROOT_TREE_DIR, 'client', 'android-beta'),
'translations': os.path.join(ROOT_TREE_DIR, 'web', 'translations.telegram.org'),
}
# order is important!
PATHS_TO_REMOVE_FROM_ALERT = [

View file

@ -15,7 +15,6 @@ from typing import List
import aiofiles
import aiohttp
from aiohttp import ClientConnectorError, ServerDisconnectedError
from bs4 import BeautifulSoup
import ccl_bplist
@ -344,6 +343,11 @@ async def download_telegram_android_beta_and_extract_resources(session: aiohttp.
async def collect_translations_paginated_content(url: str, session: aiohttp.ClientSession) -> str:
import cssutils
from bs4 import BeautifulSoup
css_parser = cssutils.CSSParser(loglevel=logging.FATAL, raiseExceptions=False)
headers = {'X-Requested-With': 'XMLHttpRequest'}
content = dict()
@ -356,7 +360,7 @@ async def collect_translations_paginated_content(url: str, session: aiohttp.Clie
async with session.post(
f'{PROTOCOL}{url}', data=data, headers=headers, allow_redirects=False, timeout=TIMEOUT
) as response:
if response.status != 200:
if (499 < response.status < 600) or (response.status != 200):
logger.debug(f'Resend cuz {response.status}')
new_offset = offset
else:
@ -369,15 +373,33 @@ async def collect_translations_paginated_content(url: str, session: aiohttp.Clie
for tr_item in tr_items:
tr_key = tr_item.find_next('div', {'class': 'tr-value-key'}).text
tr_values = tr_item.find_all('span', {'class': 'value'})
tr_values_content = [tr_value.decode_contents() for tr_value in tr_values]
tr_url = tr_item.find_next('div', {'class': 'tr-key-row'})['data-href']
tr_url = f'https://translations.telegram.org{tr_url}'
content[tr_key] = tr_values_content
tr_photo = tr_item.find_next('a', {'class': 'tr-value-photo'})
if tr_photo:
tr_photo = css_parser.parseStyle(tr_photo['style']).backgroundImage[5:-2]
tr_has_binding = tr_item.find_next('span', {'class': 'has-1binding binding'})
tr_has_binding = tr_has_binding is not None
tr_values = tr_item.find_all('span', {'class': 'value'})
tr_value_singular, *tr_value_plural = [tr_value.decode_contents() for tr_value in tr_values]
tr_values = {'singular': tr_value_singular}
if tr_value_plural:
tr_values['plural'] = tr_value_plural[0]
content[tr_key] = {
'url': tr_url,
'photo_url': tr_photo,
'has_binding': tr_has_binding is not None,
'values': tr_values,
}
new_offset = offset + 200
new_offset and await _get_page(new_offset)
except (TimeoutError, ClientConnectorError):
except (ServerDisconnectedError, TimeoutError, ClientConnectorError):
logger.warning(f'Client or timeout error. Retrying {url}; offset {offset}')
await _get_page(offset)

View file

@ -4,4 +4,5 @@ aiofiles==0.6.0
git+https://github.com/pyrogram/pyrogram@7f9e841ccd44246ad855ad4855a6431a5823c554
TgCrypto==1.2.3
beautifulsoup4==4.11.1
cssutils==2.4.2
# uvloop==0.16.0