mirror of
https://github.com/MarshalX/telegram-crawler.git
synced 2025-01-20 16:15:08 +01:00
fix wildcard
This commit is contained in:
parent
1b994f6df3
commit
621e96143f
1 changed files with 8 additions and 0 deletions
|
@ -49,6 +49,10 @@ HIDDEN_URLS = {
|
|||
'webappcontent.telegram.org/cafe', # demo 2
|
||||
# 'a-webappcontent.stel.com/demo',
|
||||
# 'a-webappcontent.stel.com/cafe',
|
||||
|
||||
# 'fragment.com/about',
|
||||
# 'fragment.com/privacy',
|
||||
# 'fragment.com/terms',
|
||||
}
|
||||
ADDITIONAL_URLS = {
|
||||
'raw.githubusercontent.com/telegramdesktop/tdesktop/dev/Telegram/Resources/tl/mtproto.tl',
|
||||
|
@ -280,6 +284,10 @@ def cleanup_links(links: set[str]) -> set[str]:
|
|||
if '@' in link_parts[0]:
|
||||
continue
|
||||
|
||||
# fix wildcard
|
||||
if link.startswith('.'):
|
||||
link = link[1:]
|
||||
|
||||
cleaned_links.add(link)
|
||||
|
||||
return cleaned_links
|
||||
|
|
Loading…
Reference in a new issue