Handle Non-Binary File Input (#2202)

* Don't fail if stream is not bytes

* move logger

* Refactor InputFile.is_image

* Use f-strings

* some clean up
This commit is contained in:
Bibo-Joshi 2020-11-24 20:31:34 +01:00 committed by GitHub
parent 58b9882021
commit 1cd3a0a156
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 33 additions and 17 deletions

View file

@ -20,14 +20,14 @@
"""This module contains an object that represents a Telegram InputFile."""
import imghdr
import logging
import mimetypes
import os
from typing import IO, Optional, Tuple
from uuid import uuid4
from telegram import TelegramError
DEFAULT_MIME_TYPE = 'application/octet-stream'
logger = logging.getLogger(__name__)
class InputFile:
@ -59,13 +59,14 @@ class InputFile:
elif hasattr(obj, 'name') and not isinstance(obj.name, int):
self.filename = os.path.basename(obj.name)
try:
self.mimetype = self.is_image(self.input_file_content)
except TelegramError:
if self.filename:
self.mimetype = mimetypes.guess_type(self.filename)[0] or DEFAULT_MIME_TYPE
else:
self.mimetype = DEFAULT_MIME_TYPE
image_mime_type = self.is_image(self.input_file_content)
if image_mime_type:
self.mimetype = image_mime_type
elif self.filename:
self.mimetype = mimetypes.guess_type(self.filename)[0] or DEFAULT_MIME_TYPE
else:
self.mimetype = DEFAULT_MIME_TYPE
if not self.filename:
self.filename = self.mimetype.replace('/', '.')
@ -74,21 +75,27 @@ class InputFile:
return self.filename, self.input_file_content, self.mimetype
@staticmethod
def is_image(stream: bytes) -> str:
def is_image(stream: bytes) -> Optional[str]:
"""Check if the content file is an image by analyzing its headers.
Args:
stream (:obj:`bytes`): A byte stream representing the content of a file.
Returns:
:obj:`str`: The str mime-type of an image.
:obj:`str` | :obj:`None`: The mime-type of an image, if the input is an image, or
:obj:`None` else.
"""
image = imghdr.what(None, stream)
if image:
return 'image/%s' % image
raise TelegramError('Could not parse file content')
try:
image = imghdr.what(None, stream)
if image:
return f'image/{image}'
return None
except Exception:
logger.debug(
"Could not parse file content. Assuming that file is not an image.", exc_info=True
)
return None
@staticmethod
def is_file(obj: object) -> bool:

1
tests/data/text_file.txt Normal file
View file

@ -0,0 +1 @@
PTB Rocks!

View file

@ -17,6 +17,7 @@
#
# You should have received a copy of the GNU Lesser Public License
# along with this program. If not, see [http://www.gnu.org/licenses/].
import logging
import os
import subprocess
import sys
@ -48,7 +49,7 @@ class TestInputFile:
# to kill it.
pass
def test_mimetypes(self):
def test_mimetypes(self, caplog):
# Only test a few to make sure logic works okay
assert InputFile(open('tests/data/telegram.jpg', 'rb')).mimetype == 'image/jpeg'
assert InputFile(open('tests/data/telegram.webp', 'rb')).mimetype == 'image/webp'
@ -65,6 +66,13 @@ class TestInputFile:
)
assert InputFile(BytesIO(b'blah')).mimetype == 'application/octet-stream'
# Test string file
with caplog.at_level(logging.DEBUG):
assert InputFile(open('tests/data/text_file.txt', 'r')).mimetype == 'text/plain'
assert len(caplog.records) == 1
assert caplog.records[0].getMessage().startswith('Could not parse file content')
def test_filenames(self):
assert InputFile(open('tests/data/telegram.jpg', 'rb')).filename == 'telegram.jpg'
assert InputFile(open('tests/data/telegram.jpg', 'rb'), filename='blah').filename == 'blah'