From 1952ba21b0814e0028ec696350bbbf699c593043 Mon Sep 17 00:00:00 2001 From: Abdujabbar MIRKHALIKOV Date: Fri, 20 Dec 2024 14:56:06 +0500 Subject: [PATCH] Small refactor for MarkItDown.convert_response function. --- src/markitdown/_markitdown.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 040a586..ecaa448 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -1385,38 +1385,26 @@ def convert_response( content_disposition = response.headers.get("content-disposition", "") m = re.search(r"filename=([^;]+)", content_disposition) if m: - base, ext = os.path.splitext(m.group(1).strip("\"'")) + _, ext = os.path.splitext(m.group(1).strip("\"'")) self._append_ext(extensions, ext) # Read from the extension from the path - base, ext = os.path.splitext(urlparse(response.url).path) + _, ext = os.path.splitext(urlparse(response.url).path) self._append_ext(extensions, ext) # Save the file locally to a temporary file. It will be deleted before this method exits - handle, temp_path = tempfile.mkstemp() - fh = os.fdopen(handle, "wb") - result = None - try: + with tempfile.NamedTemporaryFile("wb") as temp_file: # Download the file for chunk in response.iter_content(chunk_size=512): - fh.write(chunk) - fh.close() + temp_file.write(chunk) + temp_file.flush() # Use puremagic to check for more extension options - for g in self._guess_ext_magic(temp_path): + for g in self._guess_ext_magic(temp_file.name): self._append_ext(extensions, g) - # Convert - result = self._convert(temp_path, extensions, url=response.url, **kwargs) - # Clean up - finally: - try: - fh.close() - except Exception: - pass - os.unlink(temp_path) - - return result + # Convert and return + return self._convert(temp_file.name, extensions, url=response.url, **kwargs) def _convert( self, local_path: str, extensions: List[Union[str, None]], **kwargs