sanitize html and markdown in Message.text_html and text_markdown (#621)

* sanitize html and markdown in Message.text_html and text_markdown * add import for escape_html
2025-01-08 19:34:12 +01:00 · 2017-05-19 19:11:40 +02:00 · 2017-05-19 19:11:40 +02:00 · 9aa5522694
commit 9aa5522694
parent 9720f59d7e
3 changed files with 19 additions and 11 deletions
--- a/telegram/message.py
+++ b/telegram/message.py
@ -24,6 +24,7 @@ from time import mktime

 from telegram import (Audio, Contact, Document, Chat, Location, PhotoSize, Sticker, TelegramObject,
                      User, Video, Voice, Venue, MessageEntity, Game)
+from telegram.utils.helpers import escape_html, escape_markdown


 class Message(TelegramObject):
@ -635,6 +636,7 @@ class Message(TelegramObject):
        last_offset = 0

        for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)):
+            text = escape_html(text)

            if entity.type == MessageEntity.TEXT_LINK:
                insert = '<a href="{}">{}</a>'.format(entity.url, text)
@ -649,7 +651,7 @@ class Message(TelegramObject):
            else:
                insert = text

-            markdown_text += message_text[last_offset:entity.offset] + insert
+            markdown_text += escape_html(message_text[last_offset:entity.offset]) + insert
            last_offset = entity.offset + entity.length

        markdown_text += message_text[last_offset:]
@ -673,6 +675,7 @@ class Message(TelegramObject):
        last_offset = 0

        for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)):
+            text = escape_markdown(text)

            if entity.type == MessageEntity.TEXT_LINK:
                insert = '[{}]({})'.format(text, entity.url)
@ -687,7 +690,7 @@ class Message(TelegramObject):
            else:
                insert = text

-            markdown_text += message_text[last_offset:entity.offset] + insert
+            markdown_text += escape_markdown(message_text[last_offset:entity.offset]) + insert
            last_offset = entity.offset + entity.length

        markdown_text += message_text[last_offset:]
--- a/telegram/utils/helpers.py
+++ b/telegram/utils/helpers.py
@ -20,6 +20,11 @@

 import re

+try:
+    from html import escape as escape_html  # noqa: F401
+except ImportError:
+    from cgi import escape as escape_html  # noqa: F401
+

 def escape_markdown(text):
    """Helper function to escape telegram markup symbols"""
--- a/tests/test_message.py
+++ b/tests/test_message.py
@ -37,33 +37,33 @@ class MessageTest(BaseTest, unittest.TestCase):
        self.test_entities = [
            {
                'length': 4,
-                'offset': 9,
+                'offset': 10,
                'type': 'bold'
            },
            {
-                'length': 6,
-                'offset': 15,
+                'length': 7,
+                'offset': 16,
                'type': 'italic'
            },
            {
                'length': 4,
-                'offset': 23,
+                'offset': 25,
                'type': 'code'
            },
            {
                'length': 5,
-                'offset': 29,
+                'offset': 31,
                'type': 'text_link',
                'url': 'http://github.com/'
            },
            {
                'length': 3,
-                'offset': 39,
+                'offset': 41,
                'type': 'pre'
            },
        ]

-        self.test_text = 'Test for bold, italic, code, links and pre.'
+        self.test_text = 'Test for <bold, ita_lic, code, links and pre.'
        self.test_message = telegram.Message(
            message_id=1,
            from_user=None,
@ -99,12 +99,12 @@ class MessageTest(BaseTest, unittest.TestCase):
                              entity_2: 'h'})

    def test_text_html(self):
-        test_html_string = 'Test for <b>bold</b>, <i>italic</i>, <code>code</code>, ' '<a href="http://github.com/">links</a> and <pre>pre</pre>.'
+        test_html_string = 'Test for &lt;<b>bold</b>, <i>ita_lic</i>, <code>code</code>, <a href="http://github.com/">links</a> and <pre>pre</pre>.'
        text_html = self.test_message.text_html
        self.assertEquals(test_html_string, text_html)

    def test_text_markdown(self):
-        test_md_string = 'Test for *bold*, _italic_, `code`, [links](http://github.com/) and ```pre```.'
+        test_md_string = 'Test for <*bold*, _ita\_lic_, `code`, [links](http://github.com/) and ```pre```.'
        text_markdown = self.test_message.text_markdown
        self.assertEquals(test_md_string, text_markdown)