mirror of
https://github.com/MarshalX/telegram-crawler.git
synced 2025-03-23 16:49:51 +01:00
exclude subnets
This commit is contained in:
parent
4b10c29ad0
commit
f11e6e7c24
1 changed files with 3 additions and 0 deletions
|
@ -25,6 +25,8 @@ PASSPORT_SSID_REGEX = r'passport_ssid=[a-z0-9]+_[a-z0-9]+_[a-z0-9]+'
|
||||||
PASSPORT_SSID_TEMPLATE = f'passport_ssid={DYNAMIC_PART_MOCK}'
|
PASSPORT_SSID_TEMPLATE = f'passport_ssid={DYNAMIC_PART_MOCK}'
|
||||||
NONCE_REGEX = r'"nonce":"[a-z0-9]+_[a-z0-9]+_[a-z0-9]+'
|
NONCE_REGEX = r'"nonce":"[a-z0-9]+_[a-z0-9]+_[a-z0-9]+'
|
||||||
NONCE_TEMPLATE = f'"nonce":"{DYNAMIC_PART_MOCK}'
|
NONCE_TEMPLATE = f'"nonce":"{DYNAMIC_PART_MOCK}'
|
||||||
|
PROXY_CONFIG_SUB_NET_REGEX = r'\d+:8888;'
|
||||||
|
PROXY_CONFIG_SUB_NET_TEMPLATE = 'X:8888;'
|
||||||
|
|
||||||
# unsecure but so simple
|
# unsecure but so simple
|
||||||
CONNECTOR = aiohttp.TCPConnector(ssl=False)
|
CONNECTOR = aiohttp.TCPConnector(ssl=False)
|
||||||
|
@ -61,6 +63,7 @@ async def crawl(url: str, session: aiohttp.ClientSession):
|
||||||
content = re.sub(PAGE_API_HASH_REGEX, PAGE_API_HASH_TEMPLATE, content)
|
content = re.sub(PAGE_API_HASH_REGEX, PAGE_API_HASH_TEMPLATE, content)
|
||||||
content = re.sub(PASSPORT_SSID_REGEX, PASSPORT_SSID_TEMPLATE, content)
|
content = re.sub(PASSPORT_SSID_REGEX, PASSPORT_SSID_TEMPLATE, content)
|
||||||
content = re.sub(NONCE_REGEX, NONCE_TEMPLATE, content)
|
content = re.sub(NONCE_REGEX, NONCE_TEMPLATE, content)
|
||||||
|
content = re.sub(PROXY_CONFIG_SUB_NET_REGEX, PROXY_CONFIG_SUB_NET_TEMPLATE, content)
|
||||||
|
|
||||||
logger.info(f'Write to {filename}')
|
logger.info(f'Write to {filename}')
|
||||||
await f.write(content)
|
await f.write(content)
|
||||||
|
|
Loading…
Add table
Reference in a new issue