mirror of
https://github.com/MarshalX/telegram-crawler.git
synced 2025-01-06 02:45:30 +01:00
exclude subnets
This commit is contained in:
parent
4b10c29ad0
commit
f11e6e7c24
1 changed files with 3 additions and 0 deletions
|
@ -25,6 +25,8 @@ PASSPORT_SSID_REGEX = r'passport_ssid=[a-z0-9]+_[a-z0-9]+_[a-z0-9]+'
|
|||
PASSPORT_SSID_TEMPLATE = f'passport_ssid={DYNAMIC_PART_MOCK}'
|
||||
NONCE_REGEX = r'"nonce":"[a-z0-9]+_[a-z0-9]+_[a-z0-9]+'
|
||||
NONCE_TEMPLATE = f'"nonce":"{DYNAMIC_PART_MOCK}'
|
||||
PROXY_CONFIG_SUB_NET_REGEX = r'\d+:8888;'
|
||||
PROXY_CONFIG_SUB_NET_TEMPLATE = 'X:8888;'
|
||||
|
||||
# unsecure but so simple
|
||||
CONNECTOR = aiohttp.TCPConnector(ssl=False)
|
||||
|
@ -61,6 +63,7 @@ async def crawl(url: str, session: aiohttp.ClientSession):
|
|||
content = re.sub(PAGE_API_HASH_REGEX, PAGE_API_HASH_TEMPLATE, content)
|
||||
content = re.sub(PASSPORT_SSID_REGEX, PASSPORT_SSID_TEMPLATE, content)
|
||||
content = re.sub(NONCE_REGEX, NONCE_TEMPLATE, content)
|
||||
content = re.sub(PROXY_CONFIG_SUB_NET_REGEX, PROXY_CONFIG_SUB_NET_TEMPLATE, content)
|
||||
|
||||
logger.info(f'Write to {filename}')
|
||||
await f.write(content)
|
||||
|
|
Loading…
Reference in a new issue