mirror of
https://github.com/python-telegram-bot/python-telegram-bot.git
synced 2024-11-21 14:46:29 +01:00
Add Internal Constants for Encodings (#4378)
This commit is contained in:
parent
c3f17bb18e
commit
0913b859d7
15 changed files with 62 additions and 25 deletions
|
@ -97,6 +97,7 @@ The following wonderful people contributed directly or indirectly to this projec
|
|||
- `Oleg Sushchenko <https://github.com/feuillemorte>`_
|
||||
- `Or Bin <https://github.com/OrBin>`_
|
||||
- `overquota <https://github.com/overquota>`_
|
||||
- `Pablo Martinez <https://github.com/elpekenin>`_
|
||||
- `Paradox <https://github.com/paradox70>`_
|
||||
- `Patrick Hofmann <https://github.com/PH89>`_
|
||||
- `Paul Larsen <https://github.com/PaulSonOfLars>`_
|
||||
|
|
|
@ -23,6 +23,7 @@ from typing import IO, Optional, Union
|
|||
from uuid import uuid4
|
||||
|
||||
from telegram._utils.files import load_file
|
||||
from telegram._utils.strings import TextEncoding
|
||||
from telegram._utils.types import FieldTuple
|
||||
|
||||
_DEFAULT_MIME_TYPE = "application/octet-stream"
|
||||
|
@ -74,7 +75,7 @@ class InputFile:
|
|||
if isinstance(obj, bytes):
|
||||
self.input_file_content: bytes = obj
|
||||
elif isinstance(obj, str):
|
||||
self.input_file_content = obj.encode("utf-8")
|
||||
self.input_file_content = obj.encode(TextEncoding.UTF_8)
|
||||
else:
|
||||
reported_filename, self.input_file_content = load_file(obj)
|
||||
filename = filename or reported_filename
|
||||
|
|
|
@ -24,6 +24,7 @@ from telegram._files.photosize import PhotoSize
|
|||
from telegram._messageentity import MessageEntity
|
||||
from telegram._telegramobject import TelegramObject
|
||||
from telegram._utils.argumentparsing import parse_sequence_arg
|
||||
from telegram._utils.strings import TextEncoding
|
||||
from telegram._utils.types import JSONDict
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -157,10 +158,10 @@ class Game(TelegramObject):
|
|||
if not self.text:
|
||||
raise RuntimeError("This Game has no 'text'.")
|
||||
|
||||
entity_text = self.text.encode("utf-16-le")
|
||||
entity_text = self.text.encode(TextEncoding.UTF_16_LE)
|
||||
entity_text = entity_text[entity.offset * 2 : (entity.offset + entity.length) * 2]
|
||||
|
||||
return entity_text.decode("utf-16-le")
|
||||
return entity_text.decode(TextEncoding.UTF_16_LE)
|
||||
|
||||
def parse_text_entities(self, types: Optional[List[str]] = None) -> Dict[MessageEntity, str]:
|
||||
"""
|
||||
|
|
|
@ -68,6 +68,7 @@ from telegram._utils.argumentparsing import parse_sequence_arg
|
|||
from telegram._utils.datetime import extract_tzinfo_from_defaults, from_timestamp
|
||||
from telegram._utils.defaultvalue import DEFAULT_NONE, DefaultValue
|
||||
from telegram._utils.entities import parse_message_entities, parse_message_entity
|
||||
from telegram._utils.strings import TextEncoding
|
||||
from telegram._utils.types import (
|
||||
CorrectOptionID,
|
||||
FileInput,
|
||||
|
@ -1516,8 +1517,8 @@ class Message(MaybeInaccessibleMessage):
|
|||
raise RuntimeError("This message has neither text nor caption.")
|
||||
|
||||
# Telegram wants the position in UTF-16 code units, so we have to calculate in that space
|
||||
utf16_text = text.encode("utf-16-le")
|
||||
utf16_quote = quote.encode("utf-16-le")
|
||||
utf16_text = text.encode(TextEncoding.UTF_16_LE)
|
||||
utf16_quote = quote.encode(TextEncoding.UTF_16_LE)
|
||||
effective_index = index or 0
|
||||
|
||||
matches = list(re.finditer(re.escape(utf16_quote), utf16_text))
|
||||
|
@ -4479,7 +4480,7 @@ class Message(MaybeInaccessibleMessage):
|
|||
if message_text is None:
|
||||
return None
|
||||
|
||||
utf_16_text = message_text.encode("utf-16-le")
|
||||
utf_16_text = message_text.encode(TextEncoding.UTF_16_LE)
|
||||
html_text = ""
|
||||
last_offset = 0
|
||||
|
||||
|
@ -4543,7 +4544,9 @@ class Message(MaybeInaccessibleMessage):
|
|||
# text is part of the parent entity
|
||||
html_text += (
|
||||
escape(
|
||||
utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode("utf-16-le")
|
||||
utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode(
|
||||
TextEncoding.UTF_16_LE
|
||||
)
|
||||
)
|
||||
+ insert
|
||||
)
|
||||
|
@ -4551,7 +4554,7 @@ class Message(MaybeInaccessibleMessage):
|
|||
last_offset = entity.offset - offset + entity.length
|
||||
|
||||
# see comment above
|
||||
html_text += escape(utf_16_text[last_offset * 2 :].decode("utf-16-le"))
|
||||
html_text += escape(utf_16_text[last_offset * 2 :].decode(TextEncoding.UTF_16_LE))
|
||||
|
||||
return html_text
|
||||
|
||||
|
@ -4680,7 +4683,7 @@ class Message(MaybeInaccessibleMessage):
|
|||
if message_text is None:
|
||||
return None
|
||||
|
||||
utf_16_text = message_text.encode("utf-16-le")
|
||||
utf_16_text = message_text.encode(TextEncoding.UTF_16_LE)
|
||||
markdown_text = ""
|
||||
last_offset = 0
|
||||
|
||||
|
@ -4773,7 +4776,7 @@ class Message(MaybeInaccessibleMessage):
|
|||
markdown_text += (
|
||||
escape_markdown(
|
||||
utf_16_text[last_offset * 2 : (entity.offset - offset) * 2].decode(
|
||||
"utf-16-le"
|
||||
TextEncoding.UTF_16_LE
|
||||
),
|
||||
version=version,
|
||||
)
|
||||
|
@ -4784,7 +4787,7 @@ class Message(MaybeInaccessibleMessage):
|
|||
|
||||
# see comment above
|
||||
markdown_text += escape_markdown(
|
||||
utf_16_text[last_offset * 2 :].decode("utf-16-le"),
|
||||
utf_16_text[last_offset * 2 :].decode(TextEncoding.UTF_16_LE),
|
||||
version=version,
|
||||
)
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ from telegram import constants
|
|||
from telegram._telegramobject import TelegramObject
|
||||
from telegram._user import User
|
||||
from telegram._utils import enum
|
||||
from telegram._utils.strings import TextEncoding
|
||||
from telegram._utils.types import JSONDict
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -203,7 +204,7 @@ class MessageEntity(TelegramObject):
|
|||
for i, position in enumerate(positions):
|
||||
last_position = positions[i - 1] if i > 0 else 0
|
||||
text_slice = text[last_position:position]
|
||||
accumulated_length += len(text_slice.encode("utf-16-le")) // 2
|
||||
accumulated_length += len(text_slice.encode(TextEncoding.UTF_16_LE)) // 2
|
||||
position_translation[position] = accumulated_length
|
||||
# get the final output entites
|
||||
out = []
|
||||
|
|
|
@ -39,6 +39,7 @@ except ImportError:
|
|||
|
||||
from telegram._telegramobject import TelegramObject
|
||||
from telegram._utils.argumentparsing import parse_sequence_arg
|
||||
from telegram._utils.strings import TextEncoding
|
||||
from telegram._utils.types import JSONDict
|
||||
from telegram.error import PassportDecryptionError
|
||||
|
||||
|
@ -98,7 +99,7 @@ def decrypt(secret, hash, data):
|
|||
@no_type_check
|
||||
def decrypt_json(secret, hash, data):
|
||||
"""Decrypts data using secret and hash and then decodes utf-8 string and loads json"""
|
||||
return json.loads(decrypt(secret, hash, data).decode("utf-8"))
|
||||
return json.loads(decrypt(secret, hash, data).decode(TextEncoding.UTF_8))
|
||||
|
||||
|
||||
class EncryptedCredentials(TelegramObject):
|
||||
|
|
|
@ -26,6 +26,7 @@ Warning:
|
|||
from typing import Dict, Optional, Sequence
|
||||
|
||||
from telegram._messageentity import MessageEntity
|
||||
from telegram._utils.strings import TextEncoding
|
||||
|
||||
|
||||
def parse_message_entity(text: str, entity: MessageEntity) -> str:
|
||||
|
@ -38,10 +39,10 @@ def parse_message_entity(text: str, entity: MessageEntity) -> str:
|
|||
Returns:
|
||||
:obj:`str`: The text of the given entity.
|
||||
"""
|
||||
entity_text = text.encode("utf-16-le")
|
||||
entity_text = text.encode(TextEncoding.UTF_16_LE)
|
||||
entity_text = entity_text[entity.offset * 2 : (entity.offset + entity.length) * 2]
|
||||
|
||||
return entity_text.decode("utf-16-le")
|
||||
return entity_text.decode(TextEncoding.UTF_16_LE)
|
||||
|
||||
|
||||
def parse_message_entities(
|
||||
|
|
|
@ -24,6 +24,23 @@ Warning:
|
|||
the changelog.
|
||||
"""
|
||||
|
||||
from telegram._utils.enum import StringEnum
|
||||
|
||||
# TODO: Remove this when https://github.com/PyCQA/pylint/issues/6887 is resolved.
|
||||
# pylint: disable=invalid-enum-extension,invalid-slots
|
||||
|
||||
|
||||
class TextEncoding(StringEnum):
|
||||
"""This enum contains encoding schemes for text.
|
||||
|
||||
.. versionadded:: NEXT.VERSION
|
||||
"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
UTF_8 = "utf-8"
|
||||
UTF_16_LE = "utf-16-le"
|
||||
|
||||
|
||||
def to_camel_case(snake_str: str) -> str:
|
||||
"""Converts a snake_case string to camelCase.
|
||||
|
|
|
@ -26,6 +26,7 @@ from typing import AsyncContextManager, Final, List, Optional, Tuple, Type, Type
|
|||
from telegram._utils.defaultvalue import DEFAULT_NONE as _DEFAULT_NONE
|
||||
from telegram._utils.defaultvalue import DefaultValue
|
||||
from telegram._utils.logging import get_logger
|
||||
from telegram._utils.strings import TextEncoding
|
||||
from telegram._utils.types import JSONDict, ODVInput
|
||||
from telegram._utils.warnings import warn
|
||||
from telegram._version import __version__ as ptb_ver
|
||||
|
@ -403,7 +404,7 @@ class BaseRequest(
|
|||
Raises:
|
||||
TelegramError: If loading the JSON data failed
|
||||
"""
|
||||
decoded_s = payload.decode("utf-8", "replace")
|
||||
decoded_s = payload.decode(TextEncoding.UTF_8, "replace")
|
||||
try:
|
||||
return json.loads(decoded_s)
|
||||
except ValueError as exc:
|
||||
|
|
|
@ -21,6 +21,7 @@ import json
|
|||
from typing import Any, Dict, List, Optional, Union, final
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from telegram._utils.strings import TextEncoding
|
||||
from telegram._utils.types import UploadFileDict
|
||||
from telegram.request._requestparameter import RequestParameter
|
||||
|
||||
|
@ -109,7 +110,7 @@ class RequestData:
|
|||
To use a custom library for JSON encoding, you can directly encode the keys of
|
||||
:attr:`parameters` - note that string valued keys should not be JSON encoded.
|
||||
"""
|
||||
return json.dumps(self.json_parameters).encode("utf-8")
|
||||
return json.dumps(self.json_parameters).encode(TextEncoding.UTF_8)
|
||||
|
||||
@property
|
||||
def multipart_data(self) -> UploadFileDict:
|
||||
|
|
|
@ -24,6 +24,7 @@ from io import BytesIO
|
|||
import pytest
|
||||
|
||||
from telegram import InputFile
|
||||
from telegram._utils.strings import TextEncoding
|
||||
from tests.auxil.files import data_file
|
||||
from tests.auxil.slots import mro_slots
|
||||
|
||||
|
@ -150,17 +151,17 @@ class TestInputFileWithRequest:
|
|||
await (await message.document.get_file()).download_to_memory(out=out)
|
||||
out.seek(0)
|
||||
|
||||
assert out.read().decode("utf-8") == "PTB Rocks! ⅞"
|
||||
assert out.read().decode(TextEncoding.UTF_8) == "PTB Rocks! ⅞"
|
||||
|
||||
async def test_send_string(self, bot, chat_id):
|
||||
# We test this here and not at the respective test modules because it's not worth
|
||||
# duplicating the test for the different methods
|
||||
message = await bot.send_document(
|
||||
chat_id, InputFile(data_file("text_file.txt").read_text(encoding="utf-8"))
|
||||
chat_id, InputFile(data_file("text_file.txt").read_text(encoding=TextEncoding.UTF_8))
|
||||
)
|
||||
out = BytesIO()
|
||||
|
||||
await (await message.document.get_file()).download_to_memory(out=out)
|
||||
out.seek(0)
|
||||
|
||||
assert out.read().decode("utf-8") == "PTB Rocks! ⅞"
|
||||
assert out.read().decode(TextEncoding.UTF_8) == "PTB Rocks! ⅞"
|
||||
|
|
|
@ -22,6 +22,8 @@ import json
|
|||
import os
|
||||
import random
|
||||
|
||||
from telegram._utils.strings import TextEncoding
|
||||
|
||||
# Provide some public fallbacks so it's easy for contributors to run tests on their local machine
|
||||
# These bots are only able to talk in our test chats, so they are quite useless for other
|
||||
# purposes than testing.
|
||||
|
@ -42,10 +44,12 @@ GITHUB_ACTION = os.getenv("GITHUB_ACTION", None)
|
|||
BOTS = os.getenv("BOTS", None)
|
||||
JOB_INDEX = os.getenv("JOB_INDEX", None)
|
||||
if GITHUB_ACTION is not None and BOTS is not None and JOB_INDEX is not None:
|
||||
BOTS = json.loads(base64.b64decode(BOTS).decode("utf-8"))
|
||||
BOTS = json.loads(base64.b64decode(BOTS).decode(TextEncoding.UTF_8))
|
||||
JOB_INDEX = int(JOB_INDEX)
|
||||
|
||||
FALLBACKS = json.loads(base64.b64decode(FALLBACKS).decode("utf-8")) # type: list[dict[str, str]]
|
||||
FALLBACKS = json.loads(
|
||||
base64.b64decode(FALLBACKS).decode(TextEncoding.UTF_8)
|
||||
) # type: list[dict[str, str]]
|
||||
|
||||
|
||||
class BotInfoProvider:
|
||||
|
|
|
@ -23,6 +23,7 @@ import pytest
|
|||
from httpx import AsyncClient, AsyncHTTPTransport, Response
|
||||
|
||||
from telegram._utils.defaultvalue import DEFAULT_NONE
|
||||
from telegram._utils.strings import TextEncoding
|
||||
from telegram._utils.types import ODVInput
|
||||
from telegram.error import BadRequest, RetryAfter, TimedOut
|
||||
from telegram.request import HTTPXRequest, RequestData
|
||||
|
@ -103,7 +104,7 @@ async def send_webhook_message(
|
|||
content_len = None
|
||||
payload = None
|
||||
else:
|
||||
payload = bytes(payload_str, encoding="utf-8")
|
||||
payload = bytes(payload_str, encoding=TextEncoding.UTF_8)
|
||||
|
||||
if content_len == -1:
|
||||
content_len = len(payload)
|
||||
|
|
|
@ -31,6 +31,7 @@ import pytest
|
|||
from httpx import AsyncHTTPTransport
|
||||
|
||||
from telegram._utils.defaultvalue import DEFAULT_NONE
|
||||
from telegram._utils.strings import TextEncoding
|
||||
from telegram.error import (
|
||||
BadRequest,
|
||||
ChatMigrated,
|
||||
|
@ -247,7 +248,7 @@ class TestRequestWithoutRequest:
|
|||
else:
|
||||
match = "Unknown HTTPError"
|
||||
|
||||
server_response = json.dumps(response_data).encode("utf-8")
|
||||
server_response = json.dumps(response_data).encode(TextEncoding.UTF_8)
|
||||
|
||||
monkeypatch.setattr(
|
||||
httpx_request,
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from telegram._utils.strings import TextEncoding
|
||||
|
||||
telegram_root = Path(__file__).parent.parent / "telegram"
|
||||
telegram_ext_root = telegram_root / "ext"
|
||||
exclude_dirs = {
|
||||
|
@ -46,7 +48,7 @@ def test_types_are_converted_to_enum():
|
|||
# We don't check tg.ext.
|
||||
continue
|
||||
|
||||
text = path.read_text(encoding="utf-8")
|
||||
text = path.read_text(encoding=TextEncoding.UTF_8)
|
||||
for match in re.finditer(pattern, text):
|
||||
if any(exclude_pattern.match(match.group(0)) for exclude_pattern in exclude_patterns):
|
||||
continue
|
||||
|
|
Loading…
Reference in a new issue