Add methods to parse entities in Message

Should close .

* Add parse_entity

* Add parse_entities

* Add MessageEntity types as constants to MessageEntity.

* Add MAX_MESSAGE_ENTITIES to constants.py
Note: the value has been found by experimentation as opposed to extracted from the api docs.

* Add tests for parse_entity and parse_entities
This commit is contained in:
Jacob Bom 2016-09-07 08:49:09 +02:00 committed by GitHub
parent e4a132c0e4
commit 6647ae3c25
4 changed files with 135 additions and 0 deletions

View file

@ -32,6 +32,12 @@ Attributes:
limit, but eventually you'll begin receiving 429 errors.
MAX_MESSAGES_PER_SECOND (int)
MAX_MESSAGES_PER_MINUTE_PER_GROUP (int)
The following constant have been found by experimentation:
Attributes:
MAX_MESSAGE_ENTITIES (int): Max number of entities that can be in a message.
(Beyond this cap telegram will simply ignore further formatting styles)
"""
MAX_MESSAGE_LENGTH = 4096
@ -45,3 +51,4 @@ MAX_FILESIZE_UPLOAD = int(50E6) # (50MB)
MAX_MESSAGES_PER_SECOND_PER_CHAT = 1
MAX_MESSAGES_PER_SECOND = 30
MAX_MESSAGES_PER_MINUTE_PER_GROUP = 20
MAX_MESSAGE_ENTITIES = 100

View file

@ -19,6 +19,7 @@
# along with this program. If not, see [http://www.gnu.org/licenses/].
"""This module contains a object that represents a Telegram Message."""
import sys
from datetime import datetime
from time import mktime
@ -244,3 +245,55 @@ class Message(TelegramObject):
except AttributeError:
# Python 3 (< 3.3) and Python 2
return int(mktime(dt_obj.timetuple()))
def parse_entity(self, entity):
"""
Returns the text from a given :class:`telegram.MessageEntity`.
Note:
This method is present because Telegram calculates the offset and length in
UTF-16 codepoint pairs, which some versions of Python don't handle automatically.
(That is, you can't just slice ``Message.text`` with the offset and length.)
Args:
entity (MessageEntity): The entity to extract the text from. It must be an entity that
belongs to this message.
Returns:
str: The text of the given entity
"""
# Is it a narrow build, if so we don't need to convert
if sys.maxunicode == 0xffff:
return self.text[entity.offset:entity.offset + entity.length]
else:
entity_text = self.text.encode('utf-16-le')
entity_text = entity_text[entity.offset * 2:(entity.offset + entity.length) * 2]
return entity_text.decode('utf-16-le')
def parse_entities(self, types=None):
"""
Returns a ``dict`` that maps :class:`telegram.MessageEntity` to ``str``.
It contains entities from this message filtered by their ``type`` attribute as the key, and
the text that each entity belongs to as the value of the ``dict``.
Note:
This method should always be used instead of the ``entities`` attribute, since it
calculates the correct substring from the message text based on UTF-16 codepoints.
See ``get_entity_text`` for more info.
Args:
types (Optional[list]): List of ``MessageEntity`` types as strings. If the ``type``
attribute of an entity is contained in this list, it will be returned.
Defaults to a list of all types. All types can be found as constants in
:class:`telegram.MessageEntity`.
Returns:
dict[:class:`telegram.MessageEntity`, ``str``]: A dictionary of entities mapped to the
text that belongs to them, calculated based on UTF-16 codepoints.
"""
if types is None:
types = MessageEntity.ALL_TYPES
return {entity: self.parse_entity(entity)
for entity in self.entities if entity.type in types}

View file

@ -68,3 +68,17 @@ class MessageEntity(TelegramObject):
entities.append(MessageEntity.de_json(entity))
return entities
MENTION = 'mention'
HASHTAG = 'hashtag'
BOT_COMMAND = 'bot_command'
URL = 'url'
EMAIL = 'email'
BOLD = 'bold'
ITALIC = 'italic'
CODE = 'code'
PRE = 'pre'
TEXT_LINK = 'text_link'
TEXT_MENTION = 'text_mention'
ALL_TYPES = [MENTION, HASHTAG, BOT_COMMAND, URL, EMAIL, BOLD, ITALIC, CODE, PRE, TEXT_LINK,
TEXT_MENTION]

61
tests/test_message.py Normal file
View file

@ -0,0 +1,61 @@
#!/usr/bin/env python
# encoding: utf-8
#
# A library that provides a Python interface to the Telegram Bot API
# Copyright (C) 2015-2016
# Leandro Toledo de Souza <devs@python-telegram-bot.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see [http://www.gnu.org/licenses/].
"""This module contains a object that represents Tests for Telegram Message"""
import sys
import unittest
sys.path.append('.')
import telegram
from tests.base import BaseTest
class MessageTest(BaseTest, unittest.TestCase):
"""This object represents Tests for Telegram MessageTest."""
def test_parse_entity(self):
text = (b'\\U0001f469\\u200d\\U0001f469\\u200d\\U0001f467'
b'\\u200d\\U0001f467\\U0001f431http://google.com').decode('unicode-escape')
entity = telegram.MessageEntity(type=telegram.MessageEntity.URL, offset=13, length=17)
message = telegram.Message(
message_id=1, from_user=None, date=None, chat=None, text=text, entities=[entity])
self.assertEqual(message.parse_entity(entity), 'http://google.com')
def test_parse_entities(self):
text = (b'\\U0001f469\\u200d\\U0001f469\\u200d\\U0001f467'
b'\\u200d\\U0001f467\\U0001f431http://google.com').decode('unicode-escape')
entity = telegram.MessageEntity(type=telegram.MessageEntity.URL, offset=13, length=17)
entity_2 = telegram.MessageEntity(type=telegram.MessageEntity.BOLD, offset=13, length=1)
message = telegram.Message(
message_id=1,
from_user=None,
date=None,
chat=None,
text=text,
entities=[entity_2, entity])
self.assertDictEqual(
message.parse_entities(telegram.MessageEntity.URL), {entity: 'http://google.com'})
self.assertDictEqual(message.parse_entities(), {entity: 'http://google.com',
entity_2: 'h'})
if __name__ == '__main__':
unittest.main()