mirror of
https://github.com/python-telegram-bot/python-telegram-bot.git
synced 2024-11-21 14:46:29 +01:00
Add MessageEntity.shift_entities
and MessageEntity.concatenate
(#4376)
This commit is contained in:
parent
01f689373c
commit
e0f36867cc
2 changed files with 190 additions and 5 deletions
|
@ -20,7 +20,7 @@
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
import itertools
|
import itertools
|
||||||
from typing import TYPE_CHECKING, Dict, Final, List, Optional, Sequence
|
from typing import TYPE_CHECKING, Dict, Final, List, Optional, Sequence, Tuple, Union
|
||||||
|
|
||||||
from telegram import constants
|
from telegram import constants
|
||||||
from telegram._telegramobject import TelegramObject
|
from telegram._telegramobject import TelegramObject
|
||||||
|
@ -32,6 +32,8 @@ from telegram._utils.types import JSONDict
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from telegram import Bot
|
from telegram import Bot
|
||||||
|
|
||||||
|
_SEM = Sequence["MessageEntity"]
|
||||||
|
|
||||||
|
|
||||||
class MessageEntity(TelegramObject):
|
class MessageEntity(TelegramObject):
|
||||||
"""
|
"""
|
||||||
|
@ -146,9 +148,7 @@ class MessageEntity(TelegramObject):
|
||||||
return super().de_json(data=data, bot=bot)
|
return super().de_json(data=data, bot=bot)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def adjust_message_entities_to_utf_16(
|
def adjust_message_entities_to_utf_16(text: str, entities: _SEM) -> _SEM:
|
||||||
text: str, entities: Sequence["MessageEntity"]
|
|
||||||
) -> Sequence["MessageEntity"]:
|
|
||||||
"""Utility functionality for converting the offset and length of entities from
|
"""Utility functionality for converting the offset and length of entities from
|
||||||
Unicode (:obj:`str`) to UTF-16 (``utf-16-le`` encoded :obj:`bytes`).
|
Unicode (:obj:`str`) to UTF-16 (``utf-16-le`` encoded :obj:`bytes`).
|
||||||
|
|
||||||
|
@ -206,7 +206,7 @@ class MessageEntity(TelegramObject):
|
||||||
text_slice = text[last_position:position]
|
text_slice = text[last_position:position]
|
||||||
accumulated_length += len(text_slice.encode(TextEncoding.UTF_16_LE)) // 2
|
accumulated_length += len(text_slice.encode(TextEncoding.UTF_16_LE)) // 2
|
||||||
position_translation[position] = accumulated_length
|
position_translation[position] = accumulated_length
|
||||||
# get the final output entites
|
# get the final output entities
|
||||||
out = []
|
out = []
|
||||||
for entity in entities:
|
for entity in entities:
|
||||||
translated_positions = position_translation[entity.offset]
|
translated_positions = position_translation[entity.offset]
|
||||||
|
@ -220,6 +220,143 @@ class MessageEntity(TelegramObject):
|
||||||
out.append(new_entity)
|
out.append(new_entity)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def shift_entities(by: Union[str, int], entities: _SEM) -> _SEM:
|
||||||
|
"""Utility functionality for shifting the offset of entities by a given amount.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
Shifting by an integer amount:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
text = "Hello, world!"
|
||||||
|
entities = [
|
||||||
|
MessageEntity(offset=0, length=5, type=MessageEntity.BOLD),
|
||||||
|
MessageEntity(offset=7, length=5, type=MessageEntity.ITALIC),
|
||||||
|
]
|
||||||
|
shifted_entities = MessageEntity.shift_entities(1, entities)
|
||||||
|
await bot.send_message(
|
||||||
|
chat_id=123,
|
||||||
|
text="!" + text,
|
||||||
|
entities=shifted_entities,
|
||||||
|
)
|
||||||
|
|
||||||
|
Shifting using a string:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
text = "Hello, world!"
|
||||||
|
prefix = "𝄢"
|
||||||
|
entities = [
|
||||||
|
MessageEntity(offset=0, length=5, type=MessageEntity.BOLD),
|
||||||
|
MessageEntity(offset=7, length=5, type=MessageEntity.ITALIC),
|
||||||
|
]
|
||||||
|
shifted_entities = MessageEntity.shift_entities(prefix, entities)
|
||||||
|
await bot.send_message(
|
||||||
|
chat_id=123,
|
||||||
|
text=prefix + text,
|
||||||
|
entities=shifted_entities,
|
||||||
|
)
|
||||||
|
|
||||||
|
Tip:
|
||||||
|
The :paramref:`entities` are *not* modified in place. The function returns a sequence
|
||||||
|
of new objects.
|
||||||
|
|
||||||
|
.. versionadded:: NEXT.VERSION
|
||||||
|
|
||||||
|
Args:
|
||||||
|
by (:obj:`str` | :obj:`int`): Either the amount to shift the offset by or
|
||||||
|
a string whose length will be used as the amount to shift the offset by. In this
|
||||||
|
case, UTF-16 encoding will be used to calculate the length.
|
||||||
|
entities (Sequence[:class:`telegram.MessageEntity`]): Sequence of entities
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Sequence[:class:`telegram.MessageEntity`]: Sequence of entities with the offset shifted
|
||||||
|
"""
|
||||||
|
effective_shift = by if isinstance(by, int) else len(by.encode("utf-16-le")) // 2
|
||||||
|
|
||||||
|
out = []
|
||||||
|
for entity in entities:
|
||||||
|
new_entity = copy.copy(entity)
|
||||||
|
with new_entity._unfrozen():
|
||||||
|
new_entity.offset += effective_shift
|
||||||
|
out.append(new_entity)
|
||||||
|
return out
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def concatenate(
|
||||||
|
cls,
|
||||||
|
*args: Union[Tuple[str, _SEM], Tuple[str, _SEM, bool]],
|
||||||
|
) -> Tuple[str, _SEM]:
|
||||||
|
"""Utility functionality for concatenating two text along with their formatting entities.
|
||||||
|
|
||||||
|
Tip:
|
||||||
|
This function is useful for prefixing an already formatted text with a new text and its
|
||||||
|
formatting entities. In particular, it automatically correctly handles UTF-16 encoding.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
This example shows a callback function that can be used to add a prefix and suffix to
|
||||||
|
the message in a :class:`~telegram.ext.CallbackQueryHandler`:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
async def prefix_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||||
|
prefix = "𠌕 bold 𝄢 italic underlined: 𝛙𝌢𑁍 | "
|
||||||
|
prefix_entities = [
|
||||||
|
MessageEntity(offset=2, length=4, type=MessageEntity.BOLD),
|
||||||
|
MessageEntity(offset=9, length=6, type=MessageEntity.ITALIC),
|
||||||
|
MessageEntity(offset=28, length=3, type=MessageEntity.UNDERLINE),
|
||||||
|
]
|
||||||
|
suffix = " | 𠌕 bold 𝄢 italic underlined: 𝛙𝌢𑁍"
|
||||||
|
suffix_entities = [
|
||||||
|
MessageEntity(offset=5, length=4, type=MessageEntity.BOLD),
|
||||||
|
MessageEntity(offset=12, length=6, type=MessageEntity.ITALIC),
|
||||||
|
MessageEntity(offset=31, length=3, type=MessageEntity.UNDERLINE),
|
||||||
|
]
|
||||||
|
|
||||||
|
message = update.effective_message
|
||||||
|
first = (prefix, prefix_entities, True)
|
||||||
|
second = (message.text, message.entities)
|
||||||
|
third = (suffix, suffix_entities, True)
|
||||||
|
|
||||||
|
new_text, new_entities = MessageEntity.concatenate(first, second, third)
|
||||||
|
await update.callback_query.edit_message_text(
|
||||||
|
text=new_text,
|
||||||
|
entities=new_entities,
|
||||||
|
)
|
||||||
|
|
||||||
|
Hint:
|
||||||
|
The entities are *not* modified in place. The function returns a
|
||||||
|
new sequence of objects.
|
||||||
|
|
||||||
|
.. versionadded:: NEXT.VERSION
|
||||||
|
|
||||||
|
Args:
|
||||||
|
*args (Tuple[:obj:`str`, Sequence[:class:`telegram.MessageEntity`]] | \
|
||||||
|
Tuple[:obj:`str`, Sequence[:class:`telegram.MessageEntity`], :obj:`bool`]):
|
||||||
|
Arbitrary number of tuples containing the text and its entities to concatenate.
|
||||||
|
If the last element of the tuple is a :obj:`bool`, it is used to determine whether
|
||||||
|
to adjust the entities to UTF-16 via
|
||||||
|
:meth:`adjust_message_entities_to_utf_16`. UTF-16 adjustment is disabled by
|
||||||
|
default.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple[:obj:`str`, Sequence[:class:`telegram.MessageEntity`]]: The concatenated text
|
||||||
|
and its entities
|
||||||
|
"""
|
||||||
|
output_text = ""
|
||||||
|
output_entities: List[MessageEntity] = []
|
||||||
|
for arg in args:
|
||||||
|
text, entities = arg[0], arg[1]
|
||||||
|
|
||||||
|
if len(arg) > 2 and arg[2] is True:
|
||||||
|
entities = cls.adjust_message_entities_to_utf_16(text, entities)
|
||||||
|
|
||||||
|
output_entities.extend(cls.shift_entities(output_text, entities))
|
||||||
|
output_text += text
|
||||||
|
|
||||||
|
return output_text, output_entities
|
||||||
|
|
||||||
ALL_TYPES: Final[List[str]] = list(constants.MessageEntityType)
|
ALL_TYPES: Final[List[str]] = list(constants.MessageEntityType)
|
||||||
"""List[:obj:`str`]: A list of all available message entity types."""
|
"""List[:obj:`str`]: A list of all available message entity types."""
|
||||||
BLOCKQUOTE: Final[str] = constants.MessageEntityType.BLOCKQUOTE
|
BLOCKQUOTE: Final[str] = constants.MessageEntityType.BLOCKQUOTE
|
||||||
|
|
|
@ -103,6 +103,54 @@ class TestMessageEntityWithoutRequest(TestMessageEntityBase):
|
||||||
assert out_entity.offset == offset
|
assert out_entity.offset == offset
|
||||||
assert out_entity.length == length
|
assert out_entity.length == length
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("by", [6, "prefix", "𝛙𝌢𑁍"])
|
||||||
|
def test_shift_entities(self, by):
|
||||||
|
kwargs = {
|
||||||
|
"url": "url",
|
||||||
|
"user": 42,
|
||||||
|
"language": "python",
|
||||||
|
"custom_emoji_id": "custom_emoji_id",
|
||||||
|
}
|
||||||
|
entities = [
|
||||||
|
MessageEntity(MessageEntity.BOLD, 2, 3, **kwargs),
|
||||||
|
MessageEntity(MessageEntity.BOLD, 5, 6, **kwargs),
|
||||||
|
]
|
||||||
|
shifted = MessageEntity.shift_entities(by, entities)
|
||||||
|
assert shifted[0].offset == 8
|
||||||
|
assert shifted[1].offset == 11
|
||||||
|
|
||||||
|
assert shifted[0] is not entities[0]
|
||||||
|
assert shifted[1] is not entities[1]
|
||||||
|
|
||||||
|
for entity in shifted:
|
||||||
|
for key, value in kwargs.items():
|
||||||
|
assert getattr(entity, key) == value
|
||||||
|
|
||||||
|
def test_concatenate(self):
|
||||||
|
kwargs = {
|
||||||
|
"url": "url",
|
||||||
|
"user": 42,
|
||||||
|
"language": "python",
|
||||||
|
"custom_emoji_id": "custom_emoji_id",
|
||||||
|
}
|
||||||
|
first_entity = MessageEntity(MessageEntity.BOLD, 0, 6, **kwargs)
|
||||||
|
second_entity = MessageEntity(MessageEntity.ITALIC, 0, 4, **kwargs)
|
||||||
|
third_entity = MessageEntity(MessageEntity.UNDERLINE, 3, 6, **kwargs)
|
||||||
|
|
||||||
|
first = ("prefix 𝛙𝌢𑁍 | ", [first_entity], True)
|
||||||
|
second = ("text 𝛙𝌢𑁍", [second_entity], False)
|
||||||
|
third = (" | suffix 𝛙𝌢𑁍", [third_entity])
|
||||||
|
|
||||||
|
new_text, new_entities = MessageEntity.concatenate(first, second, third)
|
||||||
|
|
||||||
|
assert new_text == "prefix 𝛙𝌢𑁍 | text 𝛙𝌢𑁍 | suffix 𝛙𝌢𑁍"
|
||||||
|
assert [entity.offset for entity in new_entities] == [0, 16, 30]
|
||||||
|
for old, new in zip([first_entity, second_entity, third_entity], new_entities):
|
||||||
|
assert new is not old
|
||||||
|
assert new.type == old.type
|
||||||
|
for key, value in kwargs.items():
|
||||||
|
assert getattr(new, key) == value
|
||||||
|
|
||||||
def test_equality(self):
|
def test_equality(self):
|
||||||
a = MessageEntity(MessageEntity.BOLD, 2, 3)
|
a = MessageEntity(MessageEntity.BOLD, 2, 3)
|
||||||
b = MessageEntity(MessageEntity.BOLD, 2, 3)
|
b = MessageEntity(MessageEntity.BOLD, 2, 3)
|
||||||
|
|
Loading…
Reference in a new issue