Add Internal Constants for Encodings (#4378)

This commit is contained in:
Pablo Martínez 2024-07-21 21:13:30 +02:00 committed by GitHub
parent c3f17bb18e
commit 0913b859d7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 62 additions and 25 deletions

View file

@ -97,6 +97,7 @@ The following wonderful people contributed directly or indirectly to this projec
- `Oleg Sushchenko <https://github.com/feuillemorte>`_
- `Or Bin <https://github.com/OrBin>`_
- `overquota <https://github.com/overquota>`_
- `Pablo Martinez <https://github.com/elpekenin>`_
- `Paradox <https://github.com/paradox70>`_
- `Patrick Hofmann <https://github.com/PH89>`_
- `Paul Larsen <https://github.com/PaulSonOfLars>`_

View file

@ -23,6 +23,7 @@ from typing import IO, Optional, Union
from uuid import uuid4
from telegram._utils.files import load_file
from telegram._utils.strings import TextEncoding
from telegram._utils.types import FieldTuple
_DEFAULT_MIME_TYPE = "application/octet-stream"
@ -74,7 +75,7 @@ class InputFile:
if isinstance(obj, bytes):
self.input_file_content: bytes = obj
elif isinstance(obj, str):
self.input_file_content = obj.encode("utf-8")
self.input_file_content = obj.encode(TextEncoding.UTF_8)
else:
reported_filename, self.input_file_content = load_file(obj)
filename = filename or reported_filename

View file

@ -24,6 +24,7 @@ from telegram._files.photosize import PhotoSize
from telegram._messageentity import MessageEntity
from telegram._telegramobject import TelegramObject
from telegram._utils.argumentparsing import parse_sequence_arg
from telegram._utils.strings import TextEncoding
from telegram._utils.types import JSONDict
if TYPE_CHECKING:
@ -157,10 +158,10 @@ class Game(TelegramObject):
if not self.text:
raise RuntimeError("This Game has no 'text'.")
entity_text = self.text.encode("utf-16-le")
entity_text = self.text.encode(TextEncoding.UTF_16_LE)
entity_text = entity_text[entity.offset * 2 : (entity.offset + entity.length) * 2]
return entity_text.decode("utf-16-le")
return entity_text.decode(TextEncoding.UTF_16_LE)
def parse_text_entities(self, types: Optional[List[str]] = None) -> Dict[MessageEntity, str]:
"""

View file

@ -68,6 +68,7 @@ from telegram._utils.argumentparsing import parse_sequence_arg
from telegram._utils.datetime import extract_tzinfo_from_defaults, from_timestamp
from telegram._utils.defaultvalue import DEFAULT_NONE, DefaultValue
from telegram._utils.entities import parse_message_entities, parse_message_entity
from telegram._utils.strings import TextEncoding
from telegram._utils.types import (
CorrectOptionID,
FileInput,
@ -1516,8 +1517,8 @@ class Message(MaybeInaccessibleMessage):
raise RuntimeError("This message has neither text nor caption.")
# Telegram wants the position in UTF-16 code units, so we have to calculate in that space
utf16_text = text.encode("utf-16-le")
utf16_quote = quote.encode("utf-16-le")
utf16_text = text.encode(TextEncoding.UTF_16_LE)
utf16_quote = quote.encode(TextEncoding.UTF_16_LE)
effective_index = index or 0
matches = list(re.finditer(re.escape(utf16_quote), utf16_text))
@ -4479,7 +4480,7 @@ class Message(MaybeInaccessibleMessage):
if message_text is None:
return None
utf_16_text = message_text.encode("utf-16-le")
utf_16_text = message_text.encode(TextEncoding.UTF_16_LE)
html_text = ""
last_offset = 0
@ -4543,7 +4544,9 @@ class Message(MaybeInaccessibleMessage):
# text is part of the parent entity
html_text += (
escape(
utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode("utf-16-le")
utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode(
TextEncoding.UTF_16_LE
)
)
+ insert
)
@ -4551,7 +4554,7 @@ class Message(MaybeInaccessibleMessage):
last_offset = entity.offset - offset + entity.length
# see comment above
html_text += escape(utf_16_text[last_offset * 2 :].decode("utf-16-le"))
html_text += escape(utf_16_text[last_offset * 2 :].decode(TextEncoding.UTF_16_LE))
return html_text
@ -4680,7 +4683,7 @@ class Message(MaybeInaccessibleMessage):
if message_text is None:
return None
utf_16_text = message_text.encode("utf-16-le")
utf_16_text = message_text.encode(TextEncoding.UTF_16_LE)
markdown_text = ""
last_offset = 0
@ -4773,7 +4776,7 @@ class Message(MaybeInaccessibleMessage):
markdown_text += (
escape_markdown(
utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode(
"utf-16-le"
TextEncoding.UTF_16_LE
),
version=version,
)
@ -4784,7 +4787,7 @@ class Message(MaybeInaccessibleMessage):
# see comment above
markdown_text += escape_markdown(
utf_16_text[last_offset * 2 :].decode("utf-16-le"),
utf_16_text[last_offset * 2 :].decode(TextEncoding.UTF_16_LE),
version=version,
)

View file

@ -26,6 +26,7 @@ from telegram import constants
from telegram._telegramobject import TelegramObject
from telegram._user import User
from telegram._utils import enum
from telegram._utils.strings import TextEncoding
from telegram._utils.types import JSONDict
if TYPE_CHECKING:
@ -203,7 +204,7 @@ class MessageEntity(TelegramObject):
for i, position in enumerate(positions):
last_position = positions[i - 1] if i > 0 else 0
text_slice = text[last_position:position]
accumulated_length += len(text_slice.encode("utf-16-le")) // 2
accumulated_length += len(text_slice.encode(TextEncoding.UTF_16_LE)) // 2
position_translation[position] = accumulated_length
# get the final output entites
out = []

View file

@ -39,6 +39,7 @@ except ImportError:
from telegram._telegramobject import TelegramObject
from telegram._utils.argumentparsing import parse_sequence_arg
from telegram._utils.strings import TextEncoding
from telegram._utils.types import JSONDict
from telegram.error import PassportDecryptionError
@ -98,7 +99,7 @@ def decrypt(secret, hash, data):
@no_type_check
def decrypt_json(secret, hash, data):
"""Decrypts data using secret and hash and then decodes utf-8 string and loads json"""
return json.loads(decrypt(secret, hash, data).decode("utf-8"))
return json.loads(decrypt(secret, hash, data).decode(TextEncoding.UTF_8))
class EncryptedCredentials(TelegramObject):

View file

@ -26,6 +26,7 @@ Warning:
from typing import Dict, Optional, Sequence
from telegram._messageentity import MessageEntity
from telegram._utils.strings import TextEncoding
def parse_message_entity(text: str, entity: MessageEntity) -> str:
@ -38,10 +39,10 @@ def parse_message_entity(text: str, entity: MessageEntity) -> str:
Returns:
:obj:`str`: The text of the given entity.
"""
entity_text = text.encode("utf-16-le")
entity_text = text.encode(TextEncoding.UTF_16_LE)
entity_text = entity_text[entity.offset * 2 : (entity.offset + entity.length) * 2]
return entity_text.decode("utf-16-le")
return entity_text.decode(TextEncoding.UTF_16_LE)
def parse_message_entities(

View file

@ -24,6 +24,23 @@ Warning:
the changelog.
"""
from telegram._utils.enum import StringEnum
# TODO: Remove this when https://github.com/PyCQA/pylint/issues/6887 is resolved.
# pylint: disable=invalid-enum-extension,invalid-slots
class TextEncoding(StringEnum):
"""This enum contains encoding schemes for text.
.. versionadded:: NEXT.VERSION
"""
__slots__ = ()
UTF_8 = "utf-8"
UTF_16_LE = "utf-16-le"
def to_camel_case(snake_str: str) -> str:
"""Converts a snake_case string to camelCase.

View file

@ -26,6 +26,7 @@ from typing import AsyncContextManager, Final, List, Optional, Tuple, Type, Type
from telegram._utils.defaultvalue import DEFAULT_NONE as _DEFAULT_NONE
from telegram._utils.defaultvalue import DefaultValue
from telegram._utils.logging import get_logger
from telegram._utils.strings import TextEncoding
from telegram._utils.types import JSONDict, ODVInput
from telegram._utils.warnings import warn
from telegram._version import __version__ as ptb_ver
@ -403,7 +404,7 @@ class BaseRequest(
Raises:
TelegramError: If loading the JSON data failed
"""
decoded_s = payload.decode("utf-8", "replace")
decoded_s = payload.decode(TextEncoding.UTF_8, "replace")
try:
return json.loads(decoded_s)
except ValueError as exc:

View file

@ -21,6 +21,7 @@ import json
from typing import Any, Dict, List, Optional, Union, final
from urllib.parse import urlencode
from telegram._utils.strings import TextEncoding
from telegram._utils.types import UploadFileDict
from telegram.request._requestparameter import RequestParameter
@ -109,7 +110,7 @@ class RequestData:
To use a custom library for JSON encoding, you can directly encode the keys of
:attr:`parameters` - note that string valued keys should not be JSON encoded.
"""
return json.dumps(self.json_parameters).encode("utf-8")
return json.dumps(self.json_parameters).encode(TextEncoding.UTF_8)
@property
def multipart_data(self) -> UploadFileDict:

View file

@ -24,6 +24,7 @@ from io import BytesIO
import pytest
from telegram import InputFile
from telegram._utils.strings import TextEncoding
from tests.auxil.files import data_file
from tests.auxil.slots import mro_slots
@ -150,17 +151,17 @@ class TestInputFileWithRequest:
await (await message.document.get_file()).download_to_memory(out=out)
out.seek(0)
assert out.read().decode("utf-8") == "PTB Rocks! ⅞"
assert out.read().decode(TextEncoding.UTF_8) == "PTB Rocks! ⅞"
async def test_send_string(self, bot, chat_id):
# We test this here and not at the respective test modules because it's not worth
# duplicating the test for the different methods
message = await bot.send_document(
chat_id, InputFile(data_file("text_file.txt").read_text(encoding="utf-8"))
chat_id, InputFile(data_file("text_file.txt").read_text(encoding=TextEncoding.UTF_8))
)
out = BytesIO()
await (await message.document.get_file()).download_to_memory(out=out)
out.seek(0)
assert out.read().decode("utf-8") == "PTB Rocks! ⅞"
assert out.read().decode(TextEncoding.UTF_8) == "PTB Rocks! ⅞"

View file

@ -22,6 +22,8 @@ import json
import os
import random
from telegram._utils.strings import TextEncoding
# Provide some public fallbacks so it's easy for contributors to run tests on their local machine
# These bots are only able to talk in our test chats, so they are quite useless for other
# purposes than testing.
@ -42,10 +44,12 @@ GITHUB_ACTION = os.getenv("GITHUB_ACTION", None)
BOTS = os.getenv("BOTS", None)
JOB_INDEX = os.getenv("JOB_INDEX", None)
if GITHUB_ACTION is not None and BOTS is not None and JOB_INDEX is not None:
BOTS = json.loads(base64.b64decode(BOTS).decode("utf-8"))
BOTS = json.loads(base64.b64decode(BOTS).decode(TextEncoding.UTF_8))
JOB_INDEX = int(JOB_INDEX)
FALLBACKS = json.loads(base64.b64decode(FALLBACKS).decode("utf-8")) # type: list[dict[str, str]]
FALLBACKS = json.loads(
base64.b64decode(FALLBACKS).decode(TextEncoding.UTF_8)
) # type: list[dict[str, str]]
class BotInfoProvider:

View file

@ -23,6 +23,7 @@ import pytest
from httpx import AsyncClient, AsyncHTTPTransport, Response
from telegram._utils.defaultvalue import DEFAULT_NONE
from telegram._utils.strings import TextEncoding
from telegram._utils.types import ODVInput
from telegram.error import BadRequest, RetryAfter, TimedOut
from telegram.request import HTTPXRequest, RequestData
@ -103,7 +104,7 @@ async def send_webhook_message(
content_len = None
payload = None
else:
payload = bytes(payload_str, encoding="utf-8")
payload = bytes(payload_str, encoding=TextEncoding.UTF_8)
if content_len == -1:
content_len = len(payload)

View file

@ -31,6 +31,7 @@ import pytest
from httpx import AsyncHTTPTransport
from telegram._utils.defaultvalue import DEFAULT_NONE
from telegram._utils.strings import TextEncoding
from telegram.error import (
BadRequest,
ChatMigrated,
@ -247,7 +248,7 @@ class TestRequestWithoutRequest:
else:
match = "Unknown HTTPError"
server_response = json.dumps(response_data).encode("utf-8")
server_response = json.dumps(response_data).encode(TextEncoding.UTF_8)
monkeypatch.setattr(
httpx_request,

View file

@ -19,6 +19,8 @@
import re
from pathlib import Path
from telegram._utils.strings import TextEncoding
telegram_root = Path(__file__).parent.parent / "telegram"
telegram_ext_root = telegram_root / "ext"
exclude_dirs = {
@ -46,7 +48,7 @@ def test_types_are_converted_to_enum():
# We don't check tg.ext.
continue
text = path.read_text(encoding="utf-8")
text = path.read_text(encoding=TextEncoding.UTF_8)
for match in re.finditer(pattern, text):
if any(exclude_pattern.match(match.group(0)) for exclude_pattern in exclude_patterns):
continue