sanitize html and markdown in Message.text_html and text_markdown (#621)

* sanitize html and markdown in Message.text_html and text_markdown

* add import for escape_html
This commit is contained in:
Jannes Höke 2017-05-19 19:11:40 +02:00 committed by GitHub
parent 9720f59d7e
commit 9aa5522694
3 changed files with 19 additions and 11 deletions

View file

@ -24,6 +24,7 @@ from time import mktime
from telegram import (Audio, Contact, Document, Chat, Location, PhotoSize, Sticker, TelegramObject,
User, Video, Voice, Venue, MessageEntity, Game)
from telegram.utils.helpers import escape_html, escape_markdown
class Message(TelegramObject):
@ -635,6 +636,7 @@ class Message(TelegramObject):
last_offset = 0
for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)):
text = escape_html(text)
if entity.type == MessageEntity.TEXT_LINK:
insert = '<a href="{}">{}</a>'.format(entity.url, text)
@ -649,7 +651,7 @@ class Message(TelegramObject):
else:
insert = text
markdown_text += message_text[last_offset:entity.offset] + insert
markdown_text += escape_html(message_text[last_offset:entity.offset]) + insert
last_offset = entity.offset + entity.length
markdown_text += message_text[last_offset:]
@ -673,6 +675,7 @@ class Message(TelegramObject):
last_offset = 0
for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)):
text = escape_markdown(text)
if entity.type == MessageEntity.TEXT_LINK:
insert = '[{}]({})'.format(text, entity.url)
@ -687,7 +690,7 @@ class Message(TelegramObject):
else:
insert = text
markdown_text += message_text[last_offset:entity.offset] + insert
markdown_text += escape_markdown(message_text[last_offset:entity.offset]) + insert
last_offset = entity.offset + entity.length
markdown_text += message_text[last_offset:]

View file

@ -20,6 +20,11 @@
import re
try:
from html import escape as escape_html # noqa: F401
except ImportError:
from cgi import escape as escape_html # noqa: F401
def escape_markdown(text):
"""Helper function to escape telegram markup symbols"""

View file

@ -37,33 +37,33 @@ class MessageTest(BaseTest, unittest.TestCase):
self.test_entities = [
{
'length': 4,
'offset': 9,
'offset': 10,
'type': 'bold'
},
{
'length': 6,
'offset': 15,
'length': 7,
'offset': 16,
'type': 'italic'
},
{
'length': 4,
'offset': 23,
'offset': 25,
'type': 'code'
},
{
'length': 5,
'offset': 29,
'offset': 31,
'type': 'text_link',
'url': 'http://github.com/'
},
{
'length': 3,
'offset': 39,
'offset': 41,
'type': 'pre'
},
]
self.test_text = 'Test for bold, italic, code, links and pre.'
self.test_text = 'Test for <bold, ita_lic, code, links and pre.'
self.test_message = telegram.Message(
message_id=1,
from_user=None,
@ -99,12 +99,12 @@ class MessageTest(BaseTest, unittest.TestCase):
entity_2: 'h'})
def test_text_html(self):
test_html_string = 'Test for <b>bold</b>, <i>italic</i>, <code>code</code>, ' '<a href="http://github.com/">links</a> and <pre>pre</pre>.'
test_html_string = 'Test for &lt;<b>bold</b>, <i>ita_lic</i>, <code>code</code>, <a href="http://github.com/">links</a> and <pre>pre</pre>.'
text_html = self.test_message.text_html
self.assertEquals(test_html_string, text_html)
def test_text_markdown(self):
test_md_string = 'Test for *bold*, _italic_, `code`, [links](http://github.com/) and ```pre```.'
test_md_string = 'Test for <*bold*, _ita\_lic_, `code`, [links](http://github.com/) and ```pre```.'
text_markdown = self.test_message.text_markdown
self.assertEquals(test_md_string, text_markdown)