Add Message caption html/markdown methods (#1013)

Closes #1010
This commit is contained in:
Eldinnie 2018-02-22 16:38:54 +01:00 committed by Noam Meltzer
parent a9a503b9c3
commit b275031a16
3 changed files with 119 additions and 25 deletions

View file

@ -19,12 +19,13 @@
# along with this program. If not, see [http://www.gnu.org/licenses/].
"""This module contains an object that represents a Telegram Message."""
import sys
from html import escape
from telegram import (Audio, Contact, Document, Chat, Location, PhotoSize, Sticker, TelegramObject,
User, Video, Voice, Venue, MessageEntity, Game, Invoice, SuccessfulPayment,
VideoNote)
from telegram import ParseMode
from telegram.utils.helpers import escape_html, escape_markdown, to_timestamp, from_timestamp
from telegram.utils.helpers import escape_markdown, to_timestamp, from_timestamp
_UNDEFINED = object()
@ -865,9 +866,8 @@ class Message(TelegramObject):
for entity in self.caption_entities if entity.type in types
}
def _text_html(self, urled=False):
entities = self.parse_entities()
message_text = self.text
@staticmethod
def _parse_html(message_text, entities, urled=False):
if not sys.maxunicode == 0xffff:
message_text = message_text.encode('utf-16-le')
@ -875,7 +875,7 @@ class Message(TelegramObject):
last_offset = 0
for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)):
text = escape_html(text)
text = escape(text)
if entity.type == MessageEntity.TEXT_LINK:
insert = '<a href="{}">{}</a>'.format(entity.url, text)
@ -893,17 +893,17 @@ class Message(TelegramObject):
insert = text
if sys.maxunicode == 0xffff:
html_text += escape_html(message_text[last_offset:entity.offset]) + insert
html_text += escape(message_text[last_offset:entity.offset]) + insert
else:
html_text += escape_html(message_text[last_offset * 2:entity.offset * 2]
.decode('utf-16-le')) + insert
html_text += escape(message_text[last_offset * 2:entity.offset * 2]
.decode('utf-16-le')) + insert
last_offset = entity.offset + entity.length
if sys.maxunicode == 0xffff:
html_text += escape_html(message_text[last_offset:])
html_text += escape(message_text[last_offset:])
else:
html_text += escape_html(message_text[last_offset * 2:].decode('utf-16-le'))
html_text += escape(message_text[last_offset * 2:].decode('utf-16-le'))
return html_text
@property
@ -917,7 +917,7 @@ class Message(TelegramObject):
:obj:`str`: Message text with entities formatted as HTML.
"""
return self._text_html(urled=False)
return self._parse_html(self.text, self.parse_entities(), urled=False)
@property
def text_html_urled(self):
@ -930,11 +930,38 @@ class Message(TelegramObject):
:obj:`str`: Message text with entities formatted as HTML.
"""
return self._text_html(urled=True)
return self._parse_html(self.text, self.parse_entities(), urled=True)
def _text_markdown(self, urled=False):
entities = self.parse_entities()
message_text = self.text
@property
def caption_html(self):
"""Creates an HTML-formatted string from the markup entities found in the message's
caption.
Use this if you want to retrieve the message caption with the caption entities formatted as
HTML in the same way the original message was formatted.
Returns:
:obj:`str`: Message caption with captionentities formatted as HTML.
"""
return self._parse_html(self.caption, self.parse_caption_entities(), urled=False)
@property
def caption_html_urled(self):
"""Creates an HTML-formatted string from the markup entities found in the message's
caption.
Use this if you want to retrieve the message caption with the caption entities formatted as
HTML. This also formats :attr:`telegram.MessageEntity.URL` as a hyperlink.
Returns:
:obj:`str`: Message caption with caption entities formatted as HTML.
"""
return self._parse_html(self.caption, self.parse_caption_entities(), urled=True)
@staticmethod
def _parse_markdown(message_text, entities, urled=False):
if not sys.maxunicode == 0xffff:
message_text = message_text.encode('utf-16-le')
@ -983,7 +1010,7 @@ class Message(TelegramObject):
:obj:`str`: Message text with entities formatted as Markdown.
"""
return self._text_markdown(urled=False)
return self._parse_markdown(self.text, self.parse_entities(), urled=False)
@property
def text_markdown_urled(self):
@ -996,4 +1023,32 @@ class Message(TelegramObject):
:obj:`str`: Message text with entities formatted as Markdown.
"""
return self._text_markdown(urled=True)
return self._parse_markdown(self.text, self.parse_entities(), urled=True)
@property
def caption_markdown(self):
"""Creates an Markdown-formatted string from the markup entities found in the message's
caption.
Use this if you want to retrieve the message caption with the caption entities formatted as
Markdown in the same way the original message was formatted.
Returns:
:obj:`str`: Message caption with caption entities formatted as Markdown.
"""
return self._parse_markdown(self.caption, self.parse_caption_entities(), urled=False)
@property
def caption_markdown_urled(self):
"""Creates an Markdown-formatted string from the markup entities found in the message's
caption.
Use this if you want to retrieve the message caption with the caption entities formatted as
Markdown. This also formats :attr:`telegram.MessageEntity.URL` as a hyperlink.
Returns:
:obj:`str`: Message caption with caption entities formatted as Markdown.
"""
return self._parse_markdown(self.caption, self.parse_caption_entities(), urled=True)

View file

@ -17,17 +17,12 @@
# You should have received a copy of the GNU Lesser Public License
# along with this program. If not, see [http://www.gnu.org/licenses/].
"""This module contains helper functions."""
from html import escape
import re
import signal
from datetime import datetime
try:
from html import escape as escape_html # noqa: F401
except ImportError:
from cgi import escape as escape_html # noqa: F401
# From https://stackoverflow.com/questions/2549939/get-signal-names-from-numbers-in-python
_signames = {v: k
for k, v in reversed(sorted(vars(signal).items()))
@ -99,7 +94,7 @@ def mention_html(user_id, name):
:obj:`str`: The inline mention for the user as html.
"""
if isinstance(user_id, int):
return '<a href="tg://user?id={}">{}</a>'.format(user_id, escape_html(name))
return '<a href="tg://user?id={}">{}</a>'.format(user_id, escape(name))
def mention_markdown(user_id, name):

View file

@ -117,7 +117,9 @@ class TestMessage(object):
date=None,
chat=None,
text=test_text,
entities=[MessageEntity(**e) for e in test_entities])
entities=[MessageEntity(**e) for e in test_entities],
caption=test_text,
caption_entities=[MessageEntity(**e) for e in test_entities])
def test_all_posibilities_de_json_and_to_dict(self, bot, message_params):
new = Message.de_json(message_params.to_dict(), bot)
@ -206,6 +208,48 @@ class TestMessage(object):
text=text, entities=[bold_entity])
assert expected == message.text_markdown
def test_caption_html_simple(self):
test_html_string = ('Test for &lt;<b>bold</b>, <i>ita_lic</i>, <code>code</code>, '
'<a href="http://github.com/">links</a> and <pre>pre</pre>. '
'http://google.com')
caption_html = self.test_message.caption_html
assert caption_html == test_html_string
def test_caption_html_urled(self):
test_html_string = ('Test for &lt;<b>bold</b>, <i>ita_lic</i>, <code>code</code>, '
'<a href="http://github.com/">links</a> and <pre>pre</pre>. '
'<a href="http://google.com">http://google.com</a>')
caption_html = self.test_message.caption_html_urled
assert caption_html == test_html_string
def test_caption_markdown_simple(self):
test_md_string = ('Test for <*bold*, _ita\_lic_, `code`, [links](http://github.com/) and '
'```pre```. http://google.com')
caption_markdown = self.test_message.caption_markdown
assert caption_markdown == test_md_string
def test_caption_markdown_urled(self):
test_md_string = ('Test for <*bold*, _ita\_lic_, `code`, [links](http://github.com/) and '
'```pre```. [http://google.com](http://google.com)')
caption_markdown = self.test_message.caption_markdown_urled
assert caption_markdown == test_md_string
def test_caption_html_emoji(self):
caption = b'\\U0001f469\\u200d\\U0001f469\\u200d ABC'.decode('unicode-escape')
expected = b'\\U0001f469\\u200d\\U0001f469\\u200d <b>ABC</b>'.decode('unicode-escape')
bold_entity = MessageEntity(type=MessageEntity.BOLD, offset=7, length=3)
message = Message(1, self.from_user, self.date, self.chat,
caption=caption, caption_entities=[bold_entity])
assert expected == message.caption_html
def test_caption_markdown_emoji(self):
caption = b'\\U0001f469\\u200d\\U0001f469\\u200d ABC'.decode('unicode-escape')
expected = b'\\U0001f469\\u200d\\U0001f469\\u200d *ABC*'.decode('unicode-escape')
bold_entity = MessageEntity(type=MessageEntity.BOLD, offset=7, length=3)
message = Message(1, self.from_user, self.date, self.chat,
caption=caption, caption_entities=[bold_entity])
assert expected == message.caption_markdown
def test_parse_entities_url_emoji(self):
url = b'http://github.com/?unicode=\\u2713\\U0001f469'.decode('unicode-escape')
text = 'some url'