mirror of
https://github.com/MarshalX/telegram-crawler.git
synced 2025-02-26 22:58:25 +01:00
update apktool
This commit is contained in:
parent
9ba3442ca1
commit
7e6a4c05a1
1 changed files with 7 additions and 6 deletions
|
@ -142,7 +142,6 @@ async def track_additional_files(
|
||||||
content = get_hash(content)
|
content = get_hash(content)
|
||||||
else:
|
else:
|
||||||
content = re.sub(r'id=".*"', 'id="tgcrawl"', content)
|
content = re.sub(r'id=".*"', 'id="tgcrawl"', content)
|
||||||
content = re.sub(r'name="APKTOOL_DUMMY_.*" id', 'name="tgcrawl" id', content)
|
|
||||||
|
|
||||||
filename = os.path.join(output_dir_name, file)
|
filename = os.path.join(output_dir_name, file)
|
||||||
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
||||||
|
@ -362,7 +361,7 @@ async def download_telegram_android_beta_and_extract_resources(session: aiohttp.
|
||||||
return
|
return
|
||||||
|
|
||||||
await asyncio.gather(
|
await asyncio.gather(
|
||||||
download_file('https://bitbucket.org/iBotPeaches/apktool/downloads/apktool_2.6.1.jar', 'tool.apk', session),
|
download_file('https://bitbucket.org/iBotPeaches/apktool/downloads/apktool_2.9.0.jar', 'tool.apk', session),
|
||||||
download_file(download_url, 'android.apk', session),
|
download_file(download_url, 'android.apk', session),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -709,8 +708,9 @@ async def _crawl(url: str, session: aiohttp.ClientSession, output_dir: str):
|
||||||
is_hashable_only = is_hashable_only_content_type(response.content_type)
|
is_hashable_only = is_hashable_only_content_type(response.content_type)
|
||||||
# amazing dirt for media files like
|
# amazing dirt for media files like
|
||||||
# telegram.org/file/811140591/1/q7zZHjgES6s/9d121a89ffb0015837
|
# telegram.org/file/811140591/1/q7zZHjgES6s/9d121a89ffb0015837
|
||||||
# with response content type HTML instead of image. Shame on you
|
# with response content type HTML instead of image.
|
||||||
# sometimes it returns correct type. noice load balancing
|
# shame on you.
|
||||||
|
# sometimes it returns a correct type. noice load balancing
|
||||||
is_sucking_file = '/file/' in url and 'text' in response.content_type
|
is_sucking_file = '/file/' in url and 'text' in response.content_type
|
||||||
|
|
||||||
# handle pure domains and html pages without ext in url as html do enable syntax highlighting
|
# handle pure domains and html pages without ext in url as html do enable syntax highlighting
|
||||||
|
@ -720,8 +720,9 @@ async def _crawl(url: str, session: aiohttp.ClientSession, output_dir: str):
|
||||||
|
|
||||||
ext = '.html' if page_type is None or len(url_parts) == 1 else ''
|
ext = '.html' if page_type is None or len(url_parts) == 1 else ''
|
||||||
|
|
||||||
# I don't add ext by content type for images and so on cuz TG servers sucks.
|
# I don't add ext by content type for images, and so on cuz TG servers suck.
|
||||||
# Some servers do not return correct content type. Some servers do...
|
# Some servers do not return a correct content type.
|
||||||
|
# Some servers do...
|
||||||
if is_hashable_only or is_sucking_file:
|
if is_hashable_only or is_sucking_file:
|
||||||
ext = ''
|
ext = ''
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue