Added methods to generate the original markdown/html string from entities contained in Message (#507)

* Added methods to generate the original markdown/html formatted string from the entities contained in an update

* Added

* Moved the html/markdown parsing methods to `Message`

* Moved extract_* methods from helpers to the appropriate location

* Refactored text_markdown and text_html

* Refactored text_markdown and text_html for efficiency

* Fixed method call in conversationhandler

* Fixed method call in handler

* Fixed `make test` command on windows systems

* Improved method documentation
This commit is contained in:
Joscha Götzer 2017-01-20 20:13:58 +01:00 committed by Jannes Höke
parent 191e442e59
commit ac59f2f37c
9 changed files with 278 additions and 32 deletions

View file

@ -22,7 +22,6 @@ import logging
from telegram import Update
from telegram.ext import Handler
from telegram.utils.helpers import extract_chat_and_user
from telegram.utils.promise import Promise
@ -119,7 +118,7 @@ class ConversationHandler(Handler):
if not isinstance(update, Update) or update.channel_post:
return False
chat, user = extract_chat_and_user(update)
chat, user = update.extract_chat_and_user()
key = (chat.id, user.id) if chat else (None, user.id)
state = self.conversations.get(key)

View file

@ -20,7 +20,6 @@
Dispatcher """
from telegram.utils.deprecate import deprecate
from telegram.utils.helpers import extract_chat_and_user
class Handler(object):
@ -105,7 +104,7 @@ class Handler(object):
if self.pass_job_queue:
optional_args['job_queue'] = dispatcher.job_queue
if self.pass_user_data or self.pass_chat_data:
chat, user = extract_chat_and_user(update)
chat, user = update.extract_chat_and_user()
if self.pass_user_data:
optional_args['user_data'] = dispatcher.user_data[user.id]

View file

@ -18,7 +18,6 @@
# You should have received a copy of the GNU Lesser Public License
# along with this program. If not, see [http://www.gnu.org/licenses/].
"""This module contains an object that represents a Telegram Message."""
import sys
from datetime import datetime
from time import mktime
@ -586,6 +585,7 @@ class Message(TelegramObject):
Returns:
dict[:class:`telegram.MessageEntity`, ``str``]: A dictionary of entities mapped to the
text that belongs to them, calculated based on UTF-16 codepoints.
"""
if types is None:
types = MessageEntity.ALL_TYPES
@ -594,3 +594,80 @@ class Message(TelegramObject):
entity: self.parse_entity(entity)
for entity in self.entities if entity.type in types
}
@property
def text_html(self):
"""
Creates an html-formatted string from the markup entities found in the message
(uses ``parse_entities``).
Use this if you want to retrieve the original string sent by the bot, as opposed to the
plain text with corresponding markup entities.
Returns:
str
"""
entities = self.parse_entities()
message_text = self.text
markdown_text = ''
last_offset = 0
for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)):
if entity.type == MessageEntity.TEXT_LINK:
insert = '<a href="{}">{}</a>'.format(entity.url, text)
elif entity.type == MessageEntity.BOLD:
insert = '<b>' + text + '</b>'
elif entity.type == MessageEntity.ITALIC:
insert = '<i>' + text + '</i>'
elif entity.type == MessageEntity.CODE:
insert = '<code>' + text + '</code>'
elif entity.type == MessageEntity.PRE:
insert = '<pre>' + text + '</pre>'
else:
insert = text
markdown_text += message_text[last_offset:entity.offset] + insert
last_offset = entity.offset + entity.length
markdown_text += message_text[last_offset:]
return markdown_text
@property
def text_markdown(self):
"""
Creates a markdown-formatted string from the markup entities found in the message
(uses ``parse_entities``).
Use this if you want to retrieve the original string sent by the bot, as opposed to the
plain text with corresponding markup entities.
Returns:
str
"""
entities = self.parse_entities()
message_text = self.text
markdown_text = ''
last_offset = 0
for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)):
if entity.type == MessageEntity.TEXT_LINK:
insert = '[{}]({})'.format(text, entity.url)
elif entity.type == MessageEntity.BOLD:
insert = '*' + text + '*'
elif entity.type == MessageEntity.ITALIC:
insert = '_' + text + '_'
elif entity.type == MessageEntity.CODE:
insert = '`' + text + '`'
elif entity.type == MessageEntity.PRE:
insert = '```' + text + '```'
else:
insert = text
markdown_text += message_text[last_offset:entity.offset] + insert
last_offset = entity.offset + entity.length
markdown_text += message_text[last_offset:]
return markdown_text

View file

@ -98,3 +98,80 @@ class Update(TelegramObject):
data['edited_channel_post'] = Message.de_json(data.get('edited_channel_post'), bot)
return Update(**data)
def extract_chat_and_user(self):
"""
Helper method to get the sender's chat and user objects from an arbitrary update.
Depending on the type of update, one of the available attributes ``message``,
``edited_message`` or ``callback_query`` is used to determine the result.
Returns:
tuple: of (chat, user), with None-values if no object could not be found.
"""
user = None
chat = None
if self.message:
user = self.message.from_user
chat = self.message.chat
elif self.edited_message:
user = self.edited_message.from_user
chat = self.edited_message.chat
elif self.inline_query:
user = self.inline_query.from_user
elif self.chosen_inline_result:
user = self.chosen_inline_result.from_user
elif self.callback_query:
user = self.callback_query.from_user
chat = self.callback_query.message.chat if self.callback_query.message else None
return chat, user
def extract_message_text(self):
"""
Helper method to get the message text from an arbitrary update.
Depending on the type of update, one of the available attributes ``message``,
``edited_message`` or ``callback_query`` is used to determine the result.
Returns:
str: The extracted message text
Raises:
ValueError: If no message text was found in the update
"""
if self.message:
return self.message.text
elif self.edited_message:
return self.edited_message.text
elif self.callback_query:
return self.callback_query.message.text
else:
raise ValueError("Update contains no message text.")
def extract_entities(self):
"""
Helper method to get parsed entities from an arbitrary update.
Depending on the type of update, one of the available attributes ``message``,
``edited_message`` or ``callback_query`` is used to determine the result.
Returns:
dict[:class:`telegram.MessageEntity`, ``str``]: A dictionary of entities mapped to the
text that belongs to them, calculated based on UTF-16 codepoints.
Raises:
ValueError: If no entities were found in the update
"""
if self.message:
return self.message.parse_entities()
elif self.edited_message:
return self.edited_message.parse_entities()
elif self.callback_query:
return self.callback_query.message.parse_entities()
else:
raise ValueError("No message object found in self, therefore no entities available.")

View file

@ -18,27 +18,10 @@
# along with this program. If not, see [http://www.gnu.org/licenses/].
""" This module contains helper functions """
import re
def extract_chat_and_user(update):
user = None
chat = None
if update.message:
user = update.message.from_user
chat = update.message.chat
elif update.edited_message:
user = update.edited_message.from_user
chat = update.edited_message.chat
elif update.inline_query:
user = update.inline_query.from_user
elif update.chosen_inline_result:
user = update.chosen_inline_result.from_user
elif update.callback_query:
user = update.callback_query.from_user
chat = update.callback_query.message.chat if update.callback_query.message else None
return chat, user
def escape_markdown(text):
"""Helper function to escape telegram markup symbols"""
escape_chars = '\*_`\['
return re.sub(r'([%s])' % escape_chars, r'\\\1', text)

View file

@ -19,8 +19,8 @@
"""This module contains an object that represents a Base class for tests"""
import os
import sys
import signal
import sys
from nose.tools import make_decorator
@ -78,13 +78,21 @@ def timeout(time_limit):
raise TestTimedOut(time_limit, frame)
def newfunc(*args, **kwargs):
orig_handler = signal.signal(signal.SIGALRM, timed_out)
signal.alarm(time_limit)
try:
# Will only work on unix systems
orig_handler = signal.signal(signal.SIGALRM, timed_out)
signal.alarm(time_limit)
except AttributeError:
pass
try:
rc = func(*args, **kwargs)
finally:
signal.alarm(0)
signal.signal(signal.SIGALRM, orig_handler)
try:
# Will only work on unix systems
signal.alarm(0)
signal.signal(signal.SIGALRM, orig_handler)
except AttributeError:
pass
return rc
newfunc = make_decorator(func)(newfunc)

42
tests/test_helpers.py Normal file
View file

@ -0,0 +1,42 @@
#!/usr/bin/env python
#
# A library that provides a Python interface to the Telegram Bot API
# Copyright (C) 2015-2016
# Leandro Toledo de Souza <devs@python-telegram-bot.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see [http://www.gnu.org/licenses/].
"""This module contains an object that represents Tests for Telegram
MessageEntity"""
import sys
import unittest
from telegram.utils import helpers
sys.path.append('.')
from tests.base import BaseTest
class HelpersTest(BaseTest, unittest.TestCase):
"""This object represents Tests for the Helpers Module"""
def test_escape_markdown(self):
test_str = "*bold*, _italic_, `code`, [text_link](http://github.com/)"
expected_str = "\*bold\*, \_italic\_, \`code\`, \[text\_link](http://github.com/)"
self.assertEquals(expected_str, helpers.escape_markdown(test_str))
if __name__ == '__main__':
unittest.main()

View file

@ -33,6 +33,45 @@ from tests.base import BaseTest
class MessageTest(BaseTest, unittest.TestCase):
"""This object represents Tests for Telegram MessageTest."""
def setUp(self):
self.test_entities = [
{
'length': 4,
'offset': 9,
'type': 'bold'
},
{
'length': 6,
'offset': 15,
'type': 'italic'
},
{
'length': 4,
'offset': 23,
'type': 'code'
},
{
'length': 5,
'offset': 29,
'type': 'text_link',
'url': 'http://github.com/'
},
{
'length': 3,
'offset': 39,
'type': 'pre'
},
]
self.test_text = 'Test for bold, italic, code, links and pre.'
self.test_message = telegram.Message(
message_id=1,
from_user=None,
date=None,
chat=None,
text=self.test_text,
entities=[telegram.MessageEntity(**e) for e in self.test_entities])
def test_parse_entity(self):
text = (b'\\U0001f469\\u200d\\U0001f469\\u200d\\U0001f467'
b'\\u200d\\U0001f467\\U0001f431http://google.com').decode('unicode-escape')
@ -59,6 +98,17 @@ class MessageTest(BaseTest, unittest.TestCase):
{entity: 'http://google.com',
entity_2: 'h'})
def test_text_html(self):
test_html_string = 'Test for <b>bold</b>, <i>italic</i>, <code>code</code>, ' \
'<a href="http://github.com/">links</a> and <pre>pre</pre>.'
text_html = self.test_message.text_html
self.assertEquals(test_html_string, text_html)
def test_text_markdown(self):
test_md_string = 'Test for *bold*, _italic_, `code`, [links](http://github.com/) and ```pre```.'
text_markdown = self.test_message.text_markdown
self.assertEquals(test_md_string, text_markdown)
@flaky(3, 1)
def test_reply_text(self):
"""Test for Message.reply_text"""

View file

@ -76,6 +76,17 @@ class UpdateTest(BaseTest, unittest.TestCase):
self.assertEqual(update['update_id'], self.update_id)
self.assertTrue(isinstance(update['message'], telegram.Message))
def test_extract_chat_and_user(self):
update = telegram.Update.de_json(self.json_dict, self._bot)
chat, user = update.extract_chat_and_user()
self.assertEqual(update.message.chat, chat)
self.assertEqual(update.message.from_user, user)
def test_extract_message_text(self):
update = telegram.Update.de_json(self.json_dict, self._bot)
text = update.extract_message_text()
self.assertEqual(update.message.text, text)
if __name__ == '__main__':
unittest.main()