sanitize html and markdown in Message.text_html and text_markdown (#621)

* sanitize html and markdown in Message.text_html and text_markdown

* add import for escape_html
This commit is contained in:
Jannes Höke 2017-05-19 19:11:40 +02:00 committed by GitHub
parent 9720f59d7e
commit 9aa5522694
3 changed files with 19 additions and 11 deletions

View file

@ -24,6 +24,7 @@ from time import mktime
from telegram import (Audio, Contact, Document, Chat, Location, PhotoSize, Sticker, TelegramObject, from telegram import (Audio, Contact, Document, Chat, Location, PhotoSize, Sticker, TelegramObject,
User, Video, Voice, Venue, MessageEntity, Game) User, Video, Voice, Venue, MessageEntity, Game)
from telegram.utils.helpers import escape_html, escape_markdown
class Message(TelegramObject): class Message(TelegramObject):
@ -635,6 +636,7 @@ class Message(TelegramObject):
last_offset = 0 last_offset = 0
for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)): for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)):
text = escape_html(text)
if entity.type == MessageEntity.TEXT_LINK: if entity.type == MessageEntity.TEXT_LINK:
insert = '<a href="{}">{}</a>'.format(entity.url, text) insert = '<a href="{}">{}</a>'.format(entity.url, text)
@ -649,7 +651,7 @@ class Message(TelegramObject):
else: else:
insert = text insert = text
markdown_text += message_text[last_offset:entity.offset] + insert markdown_text += escape_html(message_text[last_offset:entity.offset]) + insert
last_offset = entity.offset + entity.length last_offset = entity.offset + entity.length
markdown_text += message_text[last_offset:] markdown_text += message_text[last_offset:]
@ -673,6 +675,7 @@ class Message(TelegramObject):
last_offset = 0 last_offset = 0
for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)): for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)):
text = escape_markdown(text)
if entity.type == MessageEntity.TEXT_LINK: if entity.type == MessageEntity.TEXT_LINK:
insert = '[{}]({})'.format(text, entity.url) insert = '[{}]({})'.format(text, entity.url)
@ -687,7 +690,7 @@ class Message(TelegramObject):
else: else:
insert = text insert = text
markdown_text += message_text[last_offset:entity.offset] + insert markdown_text += escape_markdown(message_text[last_offset:entity.offset]) + insert
last_offset = entity.offset + entity.length last_offset = entity.offset + entity.length
markdown_text += message_text[last_offset:] markdown_text += message_text[last_offset:]

View file

@ -20,6 +20,11 @@
import re import re
try:
from html import escape as escape_html # noqa: F401
except ImportError:
from cgi import escape as escape_html # noqa: F401
def escape_markdown(text): def escape_markdown(text):
"""Helper function to escape telegram markup symbols""" """Helper function to escape telegram markup symbols"""

View file

@ -37,33 +37,33 @@ class MessageTest(BaseTest, unittest.TestCase):
self.test_entities = [ self.test_entities = [
{ {
'length': 4, 'length': 4,
'offset': 9, 'offset': 10,
'type': 'bold' 'type': 'bold'
}, },
{ {
'length': 6, 'length': 7,
'offset': 15, 'offset': 16,
'type': 'italic' 'type': 'italic'
}, },
{ {
'length': 4, 'length': 4,
'offset': 23, 'offset': 25,
'type': 'code' 'type': 'code'
}, },
{ {
'length': 5, 'length': 5,
'offset': 29, 'offset': 31,
'type': 'text_link', 'type': 'text_link',
'url': 'http://github.com/' 'url': 'http://github.com/'
}, },
{ {
'length': 3, 'length': 3,
'offset': 39, 'offset': 41,
'type': 'pre' 'type': 'pre'
}, },
] ]
self.test_text = 'Test for bold, italic, code, links and pre.' self.test_text = 'Test for <bold, ita_lic, code, links and pre.'
self.test_message = telegram.Message( self.test_message = telegram.Message(
message_id=1, message_id=1,
from_user=None, from_user=None,
@ -99,12 +99,12 @@ class MessageTest(BaseTest, unittest.TestCase):
entity_2: 'h'}) entity_2: 'h'})
def test_text_html(self): def test_text_html(self):
test_html_string = 'Test for <b>bold</b>, <i>italic</i>, <code>code</code>, ' '<a href="http://github.com/">links</a> and <pre>pre</pre>.' test_html_string = 'Test for &lt;<b>bold</b>, <i>ita_lic</i>, <code>code</code>, <a href="http://github.com/">links</a> and <pre>pre</pre>.'
text_html = self.test_message.text_html text_html = self.test_message.text_html
self.assertEquals(test_html_string, text_html) self.assertEquals(test_html_string, text_html)
def test_text_markdown(self): def test_text_markdown(self):
test_md_string = 'Test for *bold*, _italic_, `code`, [links](http://github.com/) and ```pre```.' test_md_string = 'Test for <*bold*, _ita\_lic_, `code`, [links](http://github.com/) and ```pre```.'
text_markdown = self.test_message.text_markdown text_markdown = self.test_message.text_markdown
self.assertEquals(test_md_string, text_markdown) self.assertEquals(test_md_string, text_markdown)