mirror of
https://github.com/MarshalX/telegram-crawler.git
synced 2025-01-08 11:34:08 +01:00
track fragment
This commit is contained in:
parent
194ff9888c
commit
2dc60397dc
2 changed files with 7 additions and 3 deletions
|
@ -38,6 +38,8 @@ TRANSLATIONS_EN_CATEGORY_URL_REGEX = r'/en/[a-z_]+/[a-z_]+/$'
|
||||||
PAGE_GENERATION_TIME_REGEX = r'<!-- page generated in .+ -->'
|
PAGE_GENERATION_TIME_REGEX = r'<!-- page generated in .+ -->'
|
||||||
PAGE_API_HASH_REGEX = r'\?hash=[a-z0-9]+'
|
PAGE_API_HASH_REGEX = r'\?hash=[a-z0-9]+'
|
||||||
PAGE_API_HASH_TEMPLATE = f'?hash={DYNAMIC_PART_MOCK}'
|
PAGE_API_HASH_TEMPLATE = f'?hash={DYNAMIC_PART_MOCK}'
|
||||||
|
TON_RATE_REGEX = r'"tonRate":"[.0-9]+"'
|
||||||
|
TON_RATE_TEMPLATE = f'"tonRate":"{DYNAMIC_PART_MOCK}"'
|
||||||
PASSPORT_SSID_REGEX = r'passport_ssid=[a-z0-9]+_[a-z0-9]+_[a-z0-9]+'
|
PASSPORT_SSID_REGEX = r'passport_ssid=[a-z0-9]+_[a-z0-9]+_[a-z0-9]+'
|
||||||
PASSPORT_SSID_TEMPLATE = f'passport_ssid={DYNAMIC_PART_MOCK}'
|
PASSPORT_SSID_TEMPLATE = f'passport_ssid={DYNAMIC_PART_MOCK}'
|
||||||
NONCE_REGEX = r'"nonce":"[a-z0-9]+_[a-z0-9]+_[a-z0-9]+'
|
NONCE_REGEX = r'"nonce":"[a-z0-9]+_[a-z0-9]+_[a-z0-9]+'
|
||||||
|
@ -633,6 +635,7 @@ async def _crawl(url: str, session: aiohttp.ClientSession, output_dir: str):
|
||||||
content = re.sub(PROXY_CONFIG_SUB_NET_REGEX, PROXY_CONFIG_SUB_NET_TEMPLATE, content)
|
content = re.sub(PROXY_CONFIG_SUB_NET_REGEX, PROXY_CONFIG_SUB_NET_TEMPLATE, content)
|
||||||
content = re.sub(SPARKLE_SIG_REGEX, SPARKLE_SIG_TEMPLATE, content)
|
content = re.sub(SPARKLE_SIG_REGEX, SPARKLE_SIG_TEMPLATE, content)
|
||||||
content = re.sub(SPARKLE_SE_REGEX, SPARKLE_SE_TEMPLATE, content)
|
content = re.sub(SPARKLE_SE_REGEX, SPARKLE_SE_TEMPLATE, content)
|
||||||
|
content = re.sub(TON_RATE_REGEX, TON_RATE_TEMPLATE, content)
|
||||||
|
|
||||||
async with aiofiles.open(filename, 'w', encoding='utf-8') as f:
|
async with aiofiles.open(filename, 'w', encoding='utf-8') as f:
|
||||||
logger.info(f'Write to {filename}')
|
logger.info(f'Write to {filename}')
|
||||||
|
|
|
@ -50,9 +50,10 @@ HIDDEN_URLS = {
|
||||||
# 'a-webappcontent.stel.com/demo',
|
# 'a-webappcontent.stel.com/demo',
|
||||||
# 'a-webappcontent.stel.com/cafe',
|
# 'a-webappcontent.stel.com/cafe',
|
||||||
|
|
||||||
# 'fragment.com/about',
|
'fragment.com/about',
|
||||||
# 'fragment.com/privacy',
|
'fragment.com/privacy',
|
||||||
# 'fragment.com/terms',
|
'fragment.com/terms',
|
||||||
|
'fragment.com/js/auction.js',
|
||||||
}
|
}
|
||||||
ADDITIONAL_URLS = {
|
ADDITIONAL_URLS = {
|
||||||
'raw.githubusercontent.com/telegramdesktop/tdesktop/dev/Telegram/Resources/tl/mtproto.tl',
|
'raw.githubusercontent.com/telegramdesktop/tdesktop/dev/Telegram/Resources/tl/mtproto.tl',
|
||||||
|
|
Loading…
Reference in a new issue