Add Message caption html/markdown methods (#1013)

Closes #1010
This commit is contained in:
Eldinnie 2018-02-22 16:38:54 +01:00 committed by Noam Meltzer
parent a9a503b9c3
commit b275031a16
3 changed files with 119 additions and 25 deletions

View file

@ -19,12 +19,13 @@
# along with this program. If not, see [http://www.gnu.org/licenses/]. # along with this program. If not, see [http://www.gnu.org/licenses/].
"""This module contains an object that represents a Telegram Message.""" """This module contains an object that represents a Telegram Message."""
import sys import sys
from html import escape
from telegram import (Audio, Contact, Document, Chat, Location, PhotoSize, Sticker, TelegramObject, from telegram import (Audio, Contact, Document, Chat, Location, PhotoSize, Sticker, TelegramObject,
User, Video, Voice, Venue, MessageEntity, Game, Invoice, SuccessfulPayment, User, Video, Voice, Venue, MessageEntity, Game, Invoice, SuccessfulPayment,
VideoNote) VideoNote)
from telegram import ParseMode from telegram import ParseMode
from telegram.utils.helpers import escape_html, escape_markdown, to_timestamp, from_timestamp from telegram.utils.helpers import escape_markdown, to_timestamp, from_timestamp
_UNDEFINED = object() _UNDEFINED = object()
@ -865,9 +866,8 @@ class Message(TelegramObject):
for entity in self.caption_entities if entity.type in types for entity in self.caption_entities if entity.type in types
} }
def _text_html(self, urled=False): @staticmethod
entities = self.parse_entities() def _parse_html(message_text, entities, urled=False):
message_text = self.text
if not sys.maxunicode == 0xffff: if not sys.maxunicode == 0xffff:
message_text = message_text.encode('utf-16-le') message_text = message_text.encode('utf-16-le')
@ -875,7 +875,7 @@ class Message(TelegramObject):
last_offset = 0 last_offset = 0
for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)): for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)):
text = escape_html(text) text = escape(text)
if entity.type == MessageEntity.TEXT_LINK: if entity.type == MessageEntity.TEXT_LINK:
insert = '<a href="{}">{}</a>'.format(entity.url, text) insert = '<a href="{}">{}</a>'.format(entity.url, text)
@ -893,17 +893,17 @@ class Message(TelegramObject):
insert = text insert = text
if sys.maxunicode == 0xffff: if sys.maxunicode == 0xffff:
html_text += escape_html(message_text[last_offset:entity.offset]) + insert html_text += escape(message_text[last_offset:entity.offset]) + insert
else: else:
html_text += escape_html(message_text[last_offset * 2:entity.offset * 2] html_text += escape(message_text[last_offset * 2:entity.offset * 2]
.decode('utf-16-le')) + insert .decode('utf-16-le')) + insert
last_offset = entity.offset + entity.length last_offset = entity.offset + entity.length
if sys.maxunicode == 0xffff: if sys.maxunicode == 0xffff:
html_text += escape_html(message_text[last_offset:]) html_text += escape(message_text[last_offset:])
else: else:
html_text += escape_html(message_text[last_offset * 2:].decode('utf-16-le')) html_text += escape(message_text[last_offset * 2:].decode('utf-16-le'))
return html_text return html_text
@property @property
@ -917,7 +917,7 @@ class Message(TelegramObject):
:obj:`str`: Message text with entities formatted as HTML. :obj:`str`: Message text with entities formatted as HTML.
""" """
return self._text_html(urled=False) return self._parse_html(self.text, self.parse_entities(), urled=False)
@property @property
def text_html_urled(self): def text_html_urled(self):
@ -930,11 +930,38 @@ class Message(TelegramObject):
:obj:`str`: Message text with entities formatted as HTML. :obj:`str`: Message text with entities formatted as HTML.
""" """
return self._text_html(urled=True) return self._parse_html(self.text, self.parse_entities(), urled=True)
def _text_markdown(self, urled=False): @property
entities = self.parse_entities() def caption_html(self):
message_text = self.text """Creates an HTML-formatted string from the markup entities found in the message's
caption.
Use this if you want to retrieve the message caption with the caption entities formatted as
HTML in the same way the original message was formatted.
Returns:
:obj:`str`: Message caption with captionentities formatted as HTML.
"""
return self._parse_html(self.caption, self.parse_caption_entities(), urled=False)
@property
def caption_html_urled(self):
"""Creates an HTML-formatted string from the markup entities found in the message's
caption.
Use this if you want to retrieve the message caption with the caption entities formatted as
HTML. This also formats :attr:`telegram.MessageEntity.URL` as a hyperlink.
Returns:
:obj:`str`: Message caption with caption entities formatted as HTML.
"""
return self._parse_html(self.caption, self.parse_caption_entities(), urled=True)
@staticmethod
def _parse_markdown(message_text, entities, urled=False):
if not sys.maxunicode == 0xffff: if not sys.maxunicode == 0xffff:
message_text = message_text.encode('utf-16-le') message_text = message_text.encode('utf-16-le')
@ -983,7 +1010,7 @@ class Message(TelegramObject):
:obj:`str`: Message text with entities formatted as Markdown. :obj:`str`: Message text with entities formatted as Markdown.
""" """
return self._text_markdown(urled=False) return self._parse_markdown(self.text, self.parse_entities(), urled=False)
@property @property
def text_markdown_urled(self): def text_markdown_urled(self):
@ -996,4 +1023,32 @@ class Message(TelegramObject):
:obj:`str`: Message text with entities formatted as Markdown. :obj:`str`: Message text with entities formatted as Markdown.
""" """
return self._text_markdown(urled=True) return self._parse_markdown(self.text, self.parse_entities(), urled=True)
@property
def caption_markdown(self):
"""Creates an Markdown-formatted string from the markup entities found in the message's
caption.
Use this if you want to retrieve the message caption with the caption entities formatted as
Markdown in the same way the original message was formatted.
Returns:
:obj:`str`: Message caption with caption entities formatted as Markdown.
"""
return self._parse_markdown(self.caption, self.parse_caption_entities(), urled=False)
@property
def caption_markdown_urled(self):
"""Creates an Markdown-formatted string from the markup entities found in the message's
caption.
Use this if you want to retrieve the message caption with the caption entities formatted as
Markdown. This also formats :attr:`telegram.MessageEntity.URL` as a hyperlink.
Returns:
:obj:`str`: Message caption with caption entities formatted as Markdown.
"""
return self._parse_markdown(self.caption, self.parse_caption_entities(), urled=True)

View file

@ -17,17 +17,12 @@
# You should have received a copy of the GNU Lesser Public License # You should have received a copy of the GNU Lesser Public License
# along with this program. If not, see [http://www.gnu.org/licenses/]. # along with this program. If not, see [http://www.gnu.org/licenses/].
"""This module contains helper functions.""" """This module contains helper functions."""
from html import escape
import re import re
import signal import signal
from datetime import datetime from datetime import datetime
try:
from html import escape as escape_html # noqa: F401
except ImportError:
from cgi import escape as escape_html # noqa: F401
# From https://stackoverflow.com/questions/2549939/get-signal-names-from-numbers-in-python # From https://stackoverflow.com/questions/2549939/get-signal-names-from-numbers-in-python
_signames = {v: k _signames = {v: k
for k, v in reversed(sorted(vars(signal).items())) for k, v in reversed(sorted(vars(signal).items()))
@ -99,7 +94,7 @@ def mention_html(user_id, name):
:obj:`str`: The inline mention for the user as html. :obj:`str`: The inline mention for the user as html.
""" """
if isinstance(user_id, int): if isinstance(user_id, int):
return '<a href="tg://user?id={}">{}</a>'.format(user_id, escape_html(name)) return '<a href="tg://user?id={}">{}</a>'.format(user_id, escape(name))
def mention_markdown(user_id, name): def mention_markdown(user_id, name):

View file

@ -117,7 +117,9 @@ class TestMessage(object):
date=None, date=None,
chat=None, chat=None,
text=test_text, text=test_text,
entities=[MessageEntity(**e) for e in test_entities]) entities=[MessageEntity(**e) for e in test_entities],
caption=test_text,
caption_entities=[MessageEntity(**e) for e in test_entities])
def test_all_posibilities_de_json_and_to_dict(self, bot, message_params): def test_all_posibilities_de_json_and_to_dict(self, bot, message_params):
new = Message.de_json(message_params.to_dict(), bot) new = Message.de_json(message_params.to_dict(), bot)
@ -206,6 +208,48 @@ class TestMessage(object):
text=text, entities=[bold_entity]) text=text, entities=[bold_entity])
assert expected == message.text_markdown assert expected == message.text_markdown
def test_caption_html_simple(self):
test_html_string = ('Test for &lt;<b>bold</b>, <i>ita_lic</i>, <code>code</code>, '
'<a href="http://github.com/">links</a> and <pre>pre</pre>. '
'http://google.com')
caption_html = self.test_message.caption_html
assert caption_html == test_html_string
def test_caption_html_urled(self):
test_html_string = ('Test for &lt;<b>bold</b>, <i>ita_lic</i>, <code>code</code>, '
'<a href="http://github.com/">links</a> and <pre>pre</pre>. '
'<a href="http://google.com">http://google.com</a>')
caption_html = self.test_message.caption_html_urled
assert caption_html == test_html_string
def test_caption_markdown_simple(self):
test_md_string = ('Test for <*bold*, _ita\_lic_, `code`, [links](http://github.com/) and '
'```pre```. http://google.com')
caption_markdown = self.test_message.caption_markdown
assert caption_markdown == test_md_string
def test_caption_markdown_urled(self):
test_md_string = ('Test for <*bold*, _ita\_lic_, `code`, [links](http://github.com/) and '
'```pre```. [http://google.com](http://google.com)')
caption_markdown = self.test_message.caption_markdown_urled
assert caption_markdown == test_md_string
def test_caption_html_emoji(self):
caption = b'\\U0001f469\\u200d\\U0001f469\\u200d ABC'.decode('unicode-escape')
expected = b'\\U0001f469\\u200d\\U0001f469\\u200d <b>ABC</b>'.decode('unicode-escape')
bold_entity = MessageEntity(type=MessageEntity.BOLD, offset=7, length=3)
message = Message(1, self.from_user, self.date, self.chat,
caption=caption, caption_entities=[bold_entity])
assert expected == message.caption_html
def test_caption_markdown_emoji(self):
caption = b'\\U0001f469\\u200d\\U0001f469\\u200d ABC'.decode('unicode-escape')
expected = b'\\U0001f469\\u200d\\U0001f469\\u200d *ABC*'.decode('unicode-escape')
bold_entity = MessageEntity(type=MessageEntity.BOLD, offset=7, length=3)
message = Message(1, self.from_user, self.date, self.chat,
caption=caption, caption_entities=[bold_entity])
assert expected == message.caption_markdown
def test_parse_entities_url_emoji(self): def test_parse_entities_url_emoji(self):
url = b'http://github.com/?unicode=\\u2713\\U0001f469'.decode('unicode-escape') url = b'http://github.com/?unicode=\\u2713\\U0001f469'.decode('unicode-escape')
text = 'some url' text = 'some url'