From 9aa552269451c5e23385f5da19cf29d626429910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jannes=20H=C3=B6ke?= Date: Fri, 19 May 2017 19:11:40 +0200 Subject: [PATCH] sanitize html and markdown in Message.text_html and text_markdown (#621) * sanitize html and markdown in Message.text_html and text_markdown * add import for escape_html --- telegram/message.py | 7 +++++-- telegram/utils/helpers.py | 5 +++++ tests/test_message.py | 18 +++++++++--------- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/telegram/message.py b/telegram/message.py index 2122134bf..b2f7d8531 100644 --- a/telegram/message.py +++ b/telegram/message.py @@ -24,6 +24,7 @@ from time import mktime from telegram import (Audio, Contact, Document, Chat, Location, PhotoSize, Sticker, TelegramObject, User, Video, Voice, Venue, MessageEntity, Game) +from telegram.utils.helpers import escape_html, escape_markdown class Message(TelegramObject): @@ -635,6 +636,7 @@ class Message(TelegramObject): last_offset = 0 for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)): + text = escape_html(text) if entity.type == MessageEntity.TEXT_LINK: insert = '{}'.format(entity.url, text) @@ -649,7 +651,7 @@ class Message(TelegramObject): else: insert = text - markdown_text += message_text[last_offset:entity.offset] + insert + markdown_text += escape_html(message_text[last_offset:entity.offset]) + insert last_offset = entity.offset + entity.length markdown_text += message_text[last_offset:] @@ -673,6 +675,7 @@ class Message(TelegramObject): last_offset = 0 for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)): + text = escape_markdown(text) if entity.type == MessageEntity.TEXT_LINK: insert = '[{}]({})'.format(text, entity.url) @@ -687,7 +690,7 @@ class Message(TelegramObject): else: insert = text - markdown_text += message_text[last_offset:entity.offset] + insert + markdown_text += escape_markdown(message_text[last_offset:entity.offset]) + insert last_offset = entity.offset + entity.length markdown_text += message_text[last_offset:] diff --git a/telegram/utils/helpers.py b/telegram/utils/helpers.py index e97e9b704..9d6ed3a4d 100644 --- a/telegram/utils/helpers.py +++ b/telegram/utils/helpers.py @@ -20,6 +20,11 @@ import re +try: + from html import escape as escape_html # noqa: F401 +except ImportError: + from cgi import escape as escape_html # noqa: F401 + def escape_markdown(text): """Helper function to escape telegram markup symbols""" diff --git a/tests/test_message.py b/tests/test_message.py index 7e5a3ecc0..453498037 100644 --- a/tests/test_message.py +++ b/tests/test_message.py @@ -37,33 +37,33 @@ class MessageTest(BaseTest, unittest.TestCase): self.test_entities = [ { 'length': 4, - 'offset': 9, + 'offset': 10, 'type': 'bold' }, { - 'length': 6, - 'offset': 15, + 'length': 7, + 'offset': 16, 'type': 'italic' }, { 'length': 4, - 'offset': 23, + 'offset': 25, 'type': 'code' }, { 'length': 5, - 'offset': 29, + 'offset': 31, 'type': 'text_link', 'url': 'http://github.com/' }, { 'length': 3, - 'offset': 39, + 'offset': 41, 'type': 'pre' }, ] - self.test_text = 'Test for bold, italic, code, links and pre.' + self.test_text = 'Test for links and
pre
.' + test_html_string = 'Test for <bold, ita_lic, code, links and
pre
.' text_html = self.test_message.text_html self.assertEquals(test_html_string, text_html) def test_text_markdown(self): - test_md_string = 'Test for *bold*, _italic_, `code`, [links](http://github.com/) and ```pre```.' + test_md_string = 'Test for <*bold*, _ita\_lic_, `code`, [links](http://github.com/) and ```pre```.' text_markdown = self.test_message.text_markdown self.assertEquals(test_md_string, text_markdown)