From e0f36867cc402bd267624d82371bd9d861f2969c Mon Sep 17 00:00:00 2001
From: Bibo-Joshi <22366557+Bibo-Joshi@users.noreply.github.com>
Date: Sun, 1 Sep 2024 09:33:12 +0200
Subject: [PATCH] Add `MessageEntity.shift_entities` and
 `MessageEntity.concatenate` (#4376)

---
 telegram/_messageentity.py  | 147 ++++++++++++++++++++++++++++++++++--
 tests/test_messageentity.py |  48 ++++++++++++
 2 files changed, 190 insertions(+), 5 deletions(-)

diff --git a/telegram/_messageentity.py b/telegram/_messageentity.py
index 6e219537f..cdf103b2d 100644
--- a/telegram/_messageentity.py
+++ b/telegram/_messageentity.py
@@ -20,7 +20,7 @@
 
 import copy
 import itertools
-from typing import TYPE_CHECKING, Dict, Final, List, Optional, Sequence
+from typing import TYPE_CHECKING, Dict, Final, List, Optional, Sequence, Tuple, Union
 
 from telegram import constants
 from telegram._telegramobject import TelegramObject
@@ -32,6 +32,8 @@ from telegram._utils.types import JSONDict
 if TYPE_CHECKING:
     from telegram import Bot
 
+_SEM = Sequence["MessageEntity"]
+
 
 class MessageEntity(TelegramObject):
     """
@@ -146,9 +148,7 @@ class MessageEntity(TelegramObject):
         return super().de_json(data=data, bot=bot)
 
     @staticmethod
-    def adjust_message_entities_to_utf_16(
-        text: str, entities: Sequence["MessageEntity"]
-    ) -> Sequence["MessageEntity"]:
+    def adjust_message_entities_to_utf_16(text: str, entities: _SEM) -> _SEM:
         """Utility functionality for converting the offset and length of entities from
         Unicode (:obj:`str`) to UTF-16 (``utf-16-le`` encoded :obj:`bytes`).
 
@@ -206,7 +206,7 @@ class MessageEntity(TelegramObject):
             text_slice = text[last_position:position]
             accumulated_length += len(text_slice.encode(TextEncoding.UTF_16_LE)) // 2
             position_translation[position] = accumulated_length
-        # get the final output entites
+        # get the final output entities
         out = []
         for entity in entities:
             translated_positions = position_translation[entity.offset]
@@ -220,6 +220,143 @@ class MessageEntity(TelegramObject):
             out.append(new_entity)
         return out
 
+    @staticmethod
+    def shift_entities(by: Union[str, int], entities: _SEM) -> _SEM:
+        """Utility functionality for shifting the offset of entities by a given amount.
+
+        Examples:
+            Shifting by an integer amount:
+
+            .. code-block:: python
+
+                text = "Hello, world!"
+                entities = [
+                    MessageEntity(offset=0, length=5, type=MessageEntity.BOLD),
+                    MessageEntity(offset=7, length=5, type=MessageEntity.ITALIC),
+                ]
+                shifted_entities = MessageEntity.shift_entities(1, entities)
+                await bot.send_message(
+                    chat_id=123,
+                    text="!" + text,
+                    entities=shifted_entities,
+                )
+
+            Shifting using a string:
+
+            .. code-block:: python
+
+                text = "Hello, world!"
+                prefix = "𝄢"
+                entities = [
+                    MessageEntity(offset=0, length=5, type=MessageEntity.BOLD),
+                    MessageEntity(offset=7, length=5, type=MessageEntity.ITALIC),
+                ]
+                shifted_entities = MessageEntity.shift_entities(prefix, entities)
+                await bot.send_message(
+                    chat_id=123,
+                    text=prefix + text,
+                    entities=shifted_entities,
+                )
+
+        Tip:
+            The :paramref:`entities` are *not* modified in place. The function returns a sequence
+            of new objects.
+
+        .. versionadded:: NEXT.VERSION
+
+        Args:
+            by (:obj:`str` | :obj:`int`): Either the amount to shift the offset by or
+                a string whose length will be used as the amount to shift the offset by. In this
+                case, UTF-16 encoding will be used to calculate the length.
+            entities (Sequence[:class:`telegram.MessageEntity`]): Sequence of entities
+
+        Returns:
+            Sequence[:class:`telegram.MessageEntity`]: Sequence of entities with the offset shifted
+        """
+        effective_shift = by if isinstance(by, int) else len(by.encode("utf-16-le")) // 2
+
+        out = []
+        for entity in entities:
+            new_entity = copy.copy(entity)
+            with new_entity._unfrozen():
+                new_entity.offset += effective_shift
+            out.append(new_entity)
+        return out
+
+    @classmethod
+    def concatenate(
+        cls,
+        *args: Union[Tuple[str, _SEM], Tuple[str, _SEM, bool]],
+    ) -> Tuple[str, _SEM]:
+        """Utility functionality for concatenating two text along with their formatting entities.
+
+        Tip:
+            This function is useful for prefixing an already formatted text with a new text and its
+            formatting entities. In particular, it automatically correctly handles UTF-16 encoding.
+
+        Examples:
+            This example shows a callback function that can be used to add a prefix and suffix to
+            the message in a :class:`~telegram.ext.CallbackQueryHandler`:
+
+            .. code-block:: python
+
+                async def prefix_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
+                    prefix = "𠌕 bold 𝄢 italic underlined: 𝛙𝌢𑁍 | "
+                    prefix_entities = [
+                        MessageEntity(offset=2, length=4, type=MessageEntity.BOLD),
+                        MessageEntity(offset=9, length=6, type=MessageEntity.ITALIC),
+                        MessageEntity(offset=28, length=3, type=MessageEntity.UNDERLINE),
+                    ]
+                    suffix = " | 𠌕 bold 𝄢 italic underlined: 𝛙𝌢𑁍"
+                    suffix_entities = [
+                        MessageEntity(offset=5, length=4, type=MessageEntity.BOLD),
+                        MessageEntity(offset=12, length=6, type=MessageEntity.ITALIC),
+                        MessageEntity(offset=31, length=3, type=MessageEntity.UNDERLINE),
+                    ]
+
+                    message = update.effective_message
+                    first = (prefix, prefix_entities, True)
+                    second = (message.text, message.entities)
+                    third = (suffix, suffix_entities, True)
+
+                    new_text, new_entities = MessageEntity.concatenate(first, second, third)
+                    await update.callback_query.edit_message_text(
+                        text=new_text,
+                        entities=new_entities,
+                    )
+
+        Hint:
+            The entities are *not* modified in place. The function returns a
+            new sequence of objects.
+
+        .. versionadded:: NEXT.VERSION
+
+        Args:
+            *args (Tuple[:obj:`str`, Sequence[:class:`telegram.MessageEntity`]] | \
+                Tuple[:obj:`str`, Sequence[:class:`telegram.MessageEntity`], :obj:`bool`]):
+                Arbitrary number of tuples containing the text and its entities to concatenate.
+                If the last element of the tuple is a :obj:`bool`, it is used to determine whether
+                to adjust the entities to UTF-16 via
+                :meth:`adjust_message_entities_to_utf_16`. UTF-16 adjustment is disabled by
+                default.
+
+        Returns:
+            Tuple[:obj:`str`, Sequence[:class:`telegram.MessageEntity`]]: The concatenated text
+            and its entities
+        """
+        output_text = ""
+        output_entities: List[MessageEntity] = []
+        for arg in args:
+            text, entities = arg[0], arg[1]
+
+            if len(arg) > 2 and arg[2] is True:
+                entities = cls.adjust_message_entities_to_utf_16(text, entities)
+
+            output_entities.extend(cls.shift_entities(output_text, entities))
+            output_text += text
+
+        return output_text, output_entities
+
     ALL_TYPES: Final[List[str]] = list(constants.MessageEntityType)
     """List[:obj:`str`]: A list of all available message entity types."""
     BLOCKQUOTE: Final[str] = constants.MessageEntityType.BLOCKQUOTE
diff --git a/tests/test_messageentity.py b/tests/test_messageentity.py
index 8bab9fec7..2fc21ea49 100644
--- a/tests/test_messageentity.py
+++ b/tests/test_messageentity.py
@@ -103,6 +103,54 @@ class TestMessageEntityWithoutRequest(TestMessageEntityBase):
             assert out_entity.offset == offset
             assert out_entity.length == length
 
+    @pytest.mark.parametrize("by", [6, "prefix", "𝛙𝌢𑁍"])
+    def test_shift_entities(self, by):
+        kwargs = {
+            "url": "url",
+            "user": 42,
+            "language": "python",
+            "custom_emoji_id": "custom_emoji_id",
+        }
+        entities = [
+            MessageEntity(MessageEntity.BOLD, 2, 3, **kwargs),
+            MessageEntity(MessageEntity.BOLD, 5, 6, **kwargs),
+        ]
+        shifted = MessageEntity.shift_entities(by, entities)
+        assert shifted[0].offset == 8
+        assert shifted[1].offset == 11
+
+        assert shifted[0] is not entities[0]
+        assert shifted[1] is not entities[1]
+
+        for entity in shifted:
+            for key, value in kwargs.items():
+                assert getattr(entity, key) == value
+
+    def test_concatenate(self):
+        kwargs = {
+            "url": "url",
+            "user": 42,
+            "language": "python",
+            "custom_emoji_id": "custom_emoji_id",
+        }
+        first_entity = MessageEntity(MessageEntity.BOLD, 0, 6, **kwargs)
+        second_entity = MessageEntity(MessageEntity.ITALIC, 0, 4, **kwargs)
+        third_entity = MessageEntity(MessageEntity.UNDERLINE, 3, 6, **kwargs)
+
+        first = ("prefix 𝛙𝌢𑁍 | ", [first_entity], True)
+        second = ("text 𝛙𝌢𑁍", [second_entity], False)
+        third = (" | suffix 𝛙𝌢𑁍", [third_entity])
+
+        new_text, new_entities = MessageEntity.concatenate(first, second, third)
+
+        assert new_text == "prefix 𝛙𝌢𑁍 | text 𝛙𝌢𑁍 | suffix 𝛙𝌢𑁍"
+        assert [entity.offset for entity in new_entities] == [0, 16, 30]
+        for old, new in zip([first_entity, second_entity, third_entity], new_entities):
+            assert new is not old
+            assert new.type == old.type
+            for key, value in kwargs.items():
+                assert getattr(new, key) == value
+
     def test_equality(self):
         a = MessageEntity(MessageEntity.BOLD, 2, 3)
         b = MessageEntity(MessageEntity.BOLD, 2, 3)