mirror of
https://github.com/python-telegram-bot/python-telegram-bot.git
synced 2025-02-27 14:25:14 +01:00
Add methods to parse entities in Message
Should close #400. * Add parse_entity * Add parse_entities * Add MessageEntity types as constants to MessageEntity. * Add MAX_MESSAGE_ENTITIES to constants.py Note: the value has been found by experimentation as opposed to extracted from the api docs. * Add tests for parse_entity and parse_entities
This commit is contained in:
parent
e4a132c0e4
commit
6647ae3c25
4 changed files with 135 additions and 0 deletions
|
@ -32,6 +32,12 @@ Attributes:
|
|||
limit, but eventually you'll begin receiving 429 errors.
|
||||
MAX_MESSAGES_PER_SECOND (int)
|
||||
MAX_MESSAGES_PER_MINUTE_PER_GROUP (int)
|
||||
|
||||
The following constant have been found by experimentation:
|
||||
|
||||
Attributes:
|
||||
MAX_MESSAGE_ENTITIES (int): Max number of entities that can be in a message.
|
||||
(Beyond this cap telegram will simply ignore further formatting styles)
|
||||
"""
|
||||
|
||||
MAX_MESSAGE_LENGTH = 4096
|
||||
|
@ -45,3 +51,4 @@ MAX_FILESIZE_UPLOAD = int(50E6) # (50MB)
|
|||
MAX_MESSAGES_PER_SECOND_PER_CHAT = 1
|
||||
MAX_MESSAGES_PER_SECOND = 30
|
||||
MAX_MESSAGES_PER_MINUTE_PER_GROUP = 20
|
||||
MAX_MESSAGE_ENTITIES = 100
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
# along with this program. If not, see [http://www.gnu.org/licenses/].
|
||||
"""This module contains a object that represents a Telegram Message."""
|
||||
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from time import mktime
|
||||
|
||||
|
@ -244,3 +245,55 @@ class Message(TelegramObject):
|
|||
except AttributeError:
|
||||
# Python 3 (< 3.3) and Python 2
|
||||
return int(mktime(dt_obj.timetuple()))
|
||||
|
||||
def parse_entity(self, entity):
|
||||
"""
|
||||
Returns the text from a given :class:`telegram.MessageEntity`.
|
||||
|
||||
Note:
|
||||
This method is present because Telegram calculates the offset and length in
|
||||
UTF-16 codepoint pairs, which some versions of Python don't handle automatically.
|
||||
(That is, you can't just slice ``Message.text`` with the offset and length.)
|
||||
|
||||
Args:
|
||||
entity (MessageEntity): The entity to extract the text from. It must be an entity that
|
||||
belongs to this message.
|
||||
|
||||
Returns:
|
||||
str: The text of the given entity
|
||||
"""
|
||||
# Is it a narrow build, if so we don't need to convert
|
||||
if sys.maxunicode == 0xffff:
|
||||
return self.text[entity.offset:entity.offset + entity.length]
|
||||
else:
|
||||
entity_text = self.text.encode('utf-16-le')
|
||||
entity_text = entity_text[entity.offset * 2:(entity.offset + entity.length) * 2]
|
||||
|
||||
return entity_text.decode('utf-16-le')
|
||||
|
||||
def parse_entities(self, types=None):
|
||||
"""
|
||||
Returns a ``dict`` that maps :class:`telegram.MessageEntity` to ``str``.
|
||||
It contains entities from this message filtered by their ``type`` attribute as the key, and
|
||||
the text that each entity belongs to as the value of the ``dict``.
|
||||
|
||||
Note:
|
||||
This method should always be used instead of the ``entities`` attribute, since it
|
||||
calculates the correct substring from the message text based on UTF-16 codepoints.
|
||||
See ``get_entity_text`` for more info.
|
||||
|
||||
Args:
|
||||
types (Optional[list]): List of ``MessageEntity`` types as strings. If the ``type``
|
||||
attribute of an entity is contained in this list, it will be returned.
|
||||
Defaults to a list of all types. All types can be found as constants in
|
||||
:class:`telegram.MessageEntity`.
|
||||
|
||||
Returns:
|
||||
dict[:class:`telegram.MessageEntity`, ``str``]: A dictionary of entities mapped to the
|
||||
text that belongs to them, calculated based on UTF-16 codepoints.
|
||||
"""
|
||||
if types is None:
|
||||
types = MessageEntity.ALL_TYPES
|
||||
|
||||
return {entity: self.parse_entity(entity)
|
||||
for entity in self.entities if entity.type in types}
|
||||
|
|
|
@ -68,3 +68,17 @@ class MessageEntity(TelegramObject):
|
|||
entities.append(MessageEntity.de_json(entity))
|
||||
|
||||
return entities
|
||||
|
||||
MENTION = 'mention'
|
||||
HASHTAG = 'hashtag'
|
||||
BOT_COMMAND = 'bot_command'
|
||||
URL = 'url'
|
||||
EMAIL = 'email'
|
||||
BOLD = 'bold'
|
||||
ITALIC = 'italic'
|
||||
CODE = 'code'
|
||||
PRE = 'pre'
|
||||
TEXT_LINK = 'text_link'
|
||||
TEXT_MENTION = 'text_mention'
|
||||
ALL_TYPES = [MENTION, HASHTAG, BOT_COMMAND, URL, EMAIL, BOLD, ITALIC, CODE, PRE, TEXT_LINK,
|
||||
TEXT_MENTION]
|
||||
|
|
61
tests/test_message.py
Normal file
61
tests/test_message.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
#
|
||||
# A library that provides a Python interface to the Telegram Bot API
|
||||
# Copyright (C) 2015-2016
|
||||
# Leandro Toledo de Souza <devs@python-telegram-bot.org>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see [http://www.gnu.org/licenses/].
|
||||
"""This module contains a object that represents Tests for Telegram Message"""
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
sys.path.append('.')
|
||||
|
||||
import telegram
|
||||
from tests.base import BaseTest
|
||||
|
||||
|
||||
class MessageTest(BaseTest, unittest.TestCase):
|
||||
"""This object represents Tests for Telegram MessageTest."""
|
||||
|
||||
def test_parse_entity(self):
|
||||
text = (b'\\U0001f469\\u200d\\U0001f469\\u200d\\U0001f467'
|
||||
b'\\u200d\\U0001f467\\U0001f431http://google.com').decode('unicode-escape')
|
||||
entity = telegram.MessageEntity(type=telegram.MessageEntity.URL, offset=13, length=17)
|
||||
message = telegram.Message(
|
||||
message_id=1, from_user=None, date=None, chat=None, text=text, entities=[entity])
|
||||
self.assertEqual(message.parse_entity(entity), 'http://google.com')
|
||||
|
||||
def test_parse_entities(self):
|
||||
text = (b'\\U0001f469\\u200d\\U0001f469\\u200d\\U0001f467'
|
||||
b'\\u200d\\U0001f467\\U0001f431http://google.com').decode('unicode-escape')
|
||||
entity = telegram.MessageEntity(type=telegram.MessageEntity.URL, offset=13, length=17)
|
||||
entity_2 = telegram.MessageEntity(type=telegram.MessageEntity.BOLD, offset=13, length=1)
|
||||
message = telegram.Message(
|
||||
message_id=1,
|
||||
from_user=None,
|
||||
date=None,
|
||||
chat=None,
|
||||
text=text,
|
||||
entities=[entity_2, entity])
|
||||
self.assertDictEqual(
|
||||
message.parse_entities(telegram.MessageEntity.URL), {entity: 'http://google.com'})
|
||||
self.assertDictEqual(message.parse_entities(), {entity: 'http://google.com',
|
||||
entity_2: 'h'})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Add table
Reference in a new issue