mirror of
https://github.com/MarshalX/telegram-crawler.git
synced 2024-11-30 12:12:12 +01:00
Revert "Revert "add crawling of strings from telegram beta for android""
This reverts commit fba4626c10
.
This commit is contained in:
parent
fba4626c10
commit
177e6fca87
1 changed files with 50 additions and 0 deletions
|
@ -2,6 +2,7 @@ import asyncio
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import subprocess
|
||||||
from asyncio.exceptions import TimeoutError
|
from asyncio.exceptions import TimeoutError
|
||||||
from string import punctuation, whitespace
|
from string import punctuation, whitespace
|
||||||
from time import time
|
from time import time
|
||||||
|
@ -39,6 +40,52 @@ logging.basicConfig(format='%(message)s', level=logging.DEBUG)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def download_apk_and_extract_resources(session: aiohttp.ClientSession):
|
||||||
|
api_base = 'https://install.appcenter.ms/api/v0.1'
|
||||||
|
parameterized_url = 'apps/drklo-2kb-ghpo/telegram-beta-2/distribution_groups/all-users-of-telegram-beta-2'
|
||||||
|
url = f'{api_base}/{parameterized_url}'
|
||||||
|
|
||||||
|
latest_id = download_url = None
|
||||||
|
|
||||||
|
async with session.get(f'{url}/public_releases') as response:
|
||||||
|
if response.status != 200: return
|
||||||
|
json = await response.json(encoding='UTF-8')
|
||||||
|
if json and json[0]:
|
||||||
|
latest_id = json[0]['id']
|
||||||
|
|
||||||
|
if not latest_id:
|
||||||
|
return
|
||||||
|
|
||||||
|
async with session.get(f'{url}/releases/{latest_id}') as response:
|
||||||
|
if response.status != 200: return
|
||||||
|
json = await response.json(encoding='UTF-8')
|
||||||
|
if not json: return
|
||||||
|
download_url = json['download_url']
|
||||||
|
|
||||||
|
if not download_url:
|
||||||
|
return
|
||||||
|
|
||||||
|
async def download_file(url, path):
|
||||||
|
async with session.get(url) as response:
|
||||||
|
if response.status != 200: return
|
||||||
|
async with aiofiles.open(path, mode='wb') as f:
|
||||||
|
await f.write(await response.read())
|
||||||
|
|
||||||
|
await download_file('https://bitbucket.org/iBotPeaches/apktool/downloads/apktool_2.6.1.jar', 'tool.apk')
|
||||||
|
await download_file(download_url, 'app.apk')
|
||||||
|
|
||||||
|
# synced but I don't have about it ;d
|
||||||
|
subprocess.call(['java', '-jar', 'tool.apk', 'd', '-s', '-f', 'app.apk'])
|
||||||
|
|
||||||
|
path_to_strings = 'res/values/strings.xml'
|
||||||
|
|
||||||
|
filename = os.path.join(OUTPUT_FOLDER, 'telegram-beta-android', path_to_strings)
|
||||||
|
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
||||||
|
async with aiofiles.open(filename, 'w') as f:
|
||||||
|
async with aiofiles.open(os.path.join('app', path_to_strings), 'r') as ff:
|
||||||
|
await f.write(await ff.read())
|
||||||
|
|
||||||
|
|
||||||
async def collect_translations_paginated_content(url: str, session: aiohttp.ClientSession) -> str:
|
async def collect_translations_paginated_content(url: str, session: aiohttp.ClientSession) -> str:
|
||||||
headers = {'X-Requested-With': 'XMLHttpRequest'}
|
headers = {'X-Requested-With': 'XMLHttpRequest'}
|
||||||
content = list()
|
content = list()
|
||||||
|
@ -113,6 +160,9 @@ async def start(url_list: set[str]):
|
||||||
async with aiohttp.ClientSession(connector=CONNECTOR) as session:
|
async with aiohttp.ClientSession(connector=CONNECTOR) as session:
|
||||||
await asyncio.gather(*[crawl(url, session) for url in url_list])
|
await asyncio.gather(*[crawl(url, session) for url in url_list])
|
||||||
|
|
||||||
|
# yeap it will be called each run, and what? ;d
|
||||||
|
await download_apk_and_extract_resources(session)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
with open(INPUT_FILENAME, 'r') as f:
|
with open(INPUT_FILENAME, 'r') as f:
|
||||||
|
|
Loading…
Reference in a new issue