exclude frequently updated pages

This commit is contained in:
Il'ya (Marshal) 2021-05-01 15:10:20 +02:00
parent af69ce04f1
commit e55ff9fdbc

View file

@ -47,9 +47,6 @@ CRAWL_RULES = {
},
},
'instantview.telegram.org': {
'allow': {
'contest/winners'
},
'deny': {
'file/',
@ -62,6 +59,8 @@ CRAWL_RULES = {
'deny': {
'file/',
'bots/payments',
'tdlib/docs/classtd',
'constructor/',
@ -74,7 +73,17 @@ CRAWL_RULES = {
'file/',
r'apps$'
},
}
},
'webz.telegram.org': {
'deny': {
'',
},
},
'webk.telegram.org': {
'deny': {
'',
},
},
}
DIRECT_LINK_REGEX = r'([-a-zA-Z0-9@:%._\+~#]{0,249}' + BASE_URL_REGEX + r')'