Fix issue using a local path with st.image on windows. (streamlit#8092

) ## Describe your changes This PR fixes an issue with using image files via a local path on Windows (streamlit#7271). This also unifies all URL checks in the Streamlit server with a Python native implementation and cuts out the `validators` dependency. ## GitHub Issue Link (if applicable) - Closes streamlit#7271 ## Testing Plan - Updated tests --- **Contribution License Agreement** By submitting this pull request you agree that all contributions to this project are made under the Apache 2.0 license.
kaydotdev · Feb 6, 2024 · 60d0ee6 · 60d0ee6
1 parent e2a8204
commit 60d0ee6
Show file tree

Hide file tree

Showing 10 changed files with 144 additions and 83 deletions.
diff --git a/lib/min-constraints-gen.txt b/lib/min-constraints-gen.txt
@@ -18,5 +18,4 @@ toml==0.10.1
 tornado==6.0.3
 typing-extensions==4.3.0
 tzlocal==1.1
-validators==0.2
 watchdog==2.1.5
diff --git a/lib/setup.py b/lib/setup.py
@@ -54,7 +54,6 @@
     "toml>=0.10.1, <2",
     "typing-extensions>=4.3.0, <5",
     "tzlocal>=1.1, <6",
-    "validators>=0.2, <1",
     # Don't require watchdog on MacOS, since it'll fail without xcode tools.
     # Without watchdog, we fallback to a polling file watcher to check for app changes.
     "watchdog>=2.1.5; platform_system != 'Darwin'",

diff --git a/lib/streamlit/commands/page_config.py b/lib/streamlit/commands/page_config.py
@@ -15,7 +15,6 @@
 import random
 from textwrap import dedent
 from typing import TYPE_CHECKING, Mapping, Optional, Union, cast
-from urllib.parse import urlparse
 
 from typing_extensions import Final, Literal, TypeAlias
 
@@ -26,6 +25,7 @@
 from streamlit.runtime.metrics_util import gather_metrics
 from streamlit.runtime.scriptrunner import get_script_run_ctx
 from streamlit.string_util import is_emoji
+from streamlit.url_util import is_url
 from streamlit.util import lower_clean_dict_keys
 
 if TYPE_CHECKING:
@@ -259,21 +259,11 @@ def validate_menu_items(menu_items: MenuItems) -> None:
                 '"Get help", "Report a bug", and "About" '
                 f'("{k}" is not a valid key.)'
             )
-        if v is not None:
-            if not valid_url(v) and k != ABOUT_KEY:
-                raise StreamlitAPIException(f'"{v}" is a not a valid URL!')
+        if v is not None and (
+            not is_url(v, ("http", "https", "mailto")) and k != ABOUT_KEY
+        ):
+            raise StreamlitAPIException(f'"{v}" is a not a valid URL!')
 
 
 def valid_menu_item_key(key: str) -> "TypeGuard[MenuKey]":
     return key in {GET_HELP_KEY, REPORT_A_BUG_KEY, ABOUT_KEY}
-
-
-def valid_url(url: str) -> bool:
-    # Function taken from https://stackoverflow.com/questions/7160737/how-to-validate-a-url-in-python-malformed-or-not
-    try:
-        result = urlparse(url)
-        if result.scheme == "mailto":
-            return all([result.scheme, result.path])
-        return all([result.scheme, result.netloc])
-    except Exception:
-        return False
diff --git a/lib/streamlit/elements/image.py b/lib/streamlit/elements/image.py
@@ -22,16 +22,16 @@
 import base64
 import io
 import mimetypes
+import os
 import re
 from enum import IntEnum
 from typing import TYPE_CHECKING, List, Optional, Sequence, Union, cast
-from urllib.parse import urlparse
 
 import numpy as np
 from PIL import GifImagePlugin, Image, ImageFile
 from typing_extensions import Final, Literal, TypeAlias
 
-from streamlit import runtime
+from streamlit import runtime, url_util
 from streamlit.errors import StreamlitAPIException
 from streamlit.logger import get_logger
 from streamlit.proto.Image_pb2 import ImageList as ImageListProto
@@ -342,8 +342,15 @@ def image_to_url(
 
     # Strings
     if isinstance(image, str):
-        # Unpack local SVG image file to an SVG string
-        if image.endswith(".svg") and not image.startswith(("http://", "https://")):
+
+        if not os.path.isfile(image) and url_util.is_url(
+            image, allowed_schemas=("http", "https", "data")
+        ):
+            # If it's a url, return it directly.
+            return image
+
+        if image.endswith(".svg") and os.path.isfile(image):
+            # Unpack local SVG image file to an SVG string
             with open(image) as textfile:
                 image = textfile.read()
 
@@ -361,15 +368,6 @@ def image_to_url(
             # Return SVG as data URI:
             return f"data:image/svg+xml;base64,{image_b64_encoded}"
 
-        # If it's a url, return it directly.
-        try:
-            p = urlparse(image)
-            if p.scheme:
-                return image
-        except UnicodeDecodeError:
-            # If the string runs into a UnicodeDecodeError, we assume it is not a valid URL.
-            pass
-
         # Otherwise, try to open it as a file.
         try:
             with open(image, "rb") as f:

diff --git a/lib/streamlit/elements/media.py b/lib/streamlit/elements/media.py
@@ -19,7 +19,7 @@
 from typing_extensions import Final, TypeAlias
 
 import streamlit as st
-from streamlit import runtime, type_util
+from streamlit import runtime, type_util, url_util
 from streamlit.errors import StreamlitAPIException
 from streamlit.proto.Audio_pb2 import Audio as AudioProto
 from streamlit.proto.Video_pb2 import Video as VideoProto
@@ -280,16 +280,16 @@ def marshall_video(
     start_time : int
         The time from which this element should start playing. (default: 0)
     """
-    from validators import url
 
     proto.start_time = start_time
 
     # "type" distinguishes between YouTube and non-YouTube links
     proto.type = VideoProto.Type.NATIVE
 
-    if isinstance(data, str) and url(data):
-        youtube_url = _reshape_youtube_url(data)
-        if youtube_url:
+    if isinstance(data, str) and url_util.is_url(
+        data, allowed_schemas=("http", "https", "data")
+    ):
+        if youtube_url := _reshape_youtube_url(data):
             proto.url = youtube_url
             proto.type = VideoProto.Type.YOUTUBE_IFRAME
         else:
@@ -405,11 +405,12 @@ def marshall_audio(
     sample_rate: int or None
         Optional param to provide sample_rate in case of numpy array
     """
-    from validators import url
 
     proto.start_time = start_time
 
-    if isinstance(data, str) and url(data):
+    if isinstance(data, str) and url_util.is_url(
+        data, allowed_schemas=("http", "https", "data")
+    ):
         proto.url = data
 
     else:

diff --git a/lib/streamlit/url_util.py b/lib/streamlit/url_util.py
@@ -13,8 +13,13 @@
 # limitations under the License.
 
 import re
-import urllib
-from typing import Optional
+from typing import Literal, Optional, Tuple
+from urllib.parse import urlparse
+
+from typing_extensions import TypeAlias
+
+UrlSchema: TypeAlias = Literal["http", "https", "mailto", "data"]
+
 
 # Regular expression for process_gitblob_url
 _GITBLOB_RE = re.compile(
@@ -55,15 +60,46 @@ def get_hostname(url: str) -> Optional[str]:
     # Just so urllib can parse the URL, make sure there's a protocol.
     # (The actual protocol doesn't matter to us)
     if "://" not in url:
-        url = "http://%s" % url
+        url = f"http://{url}"
 
-    parsed = urllib.parse.urlparse(url)
+    parsed = urlparse(url)
     return parsed.hostname
 
 
 def print_url(title, url):
     """Pretty-print a URL on the terminal."""
     import click
 
-    click.secho("  %s: " % title, nl=False, fg="blue")
+    click.secho(f"  {title}: ", nl=False, fg="blue")
     click.secho(url, bold=True)
+
+
+def is_url(
+    url: str,
+    allowed_schemas: Tuple[UrlSchema, ...] = ("http", "https"),
+) -> bool:
+    """Check if a string looks like an URL.
+
+    This doesn't check if the URL is actually valid or reachable.
+
+    Parameters
+    ----------
+    url : str
+        The URL to check.
+
+    allowed_schemas : Tuple[str]
+        The allowed URL schemas. Default is ("http", "https").
+    """
+    try:
+        result = urlparse(str(url))
+        if result.scheme not in allowed_schemas:
+            return False
+
+        if result.scheme in ["http", "https"]:
+            return bool(result.netloc)
+        elif result.scheme in ["mailto", "data"]:
+            return bool(result.path)
+
+    except ValueError:
+        return False
+    return False
diff --git a/lib/streamlit/web/cli.py b/lib/streamlit/web/cli.py
@@ -196,7 +196,7 @@ def main_run(target: str, args=None, **kwargs):
     will download the script to a temporary file and runs this file.
 
     """
-    from validators import url
+    from streamlit import url_util
 
     bootstrap.load_config_options(flag_options=kwargs)
 
@@ -211,14 +211,12 @@ def main_run(target: str, args=None, **kwargs):
                 f"Streamlit requires raw Python (.py) files, not {extension}.\nFor more information, please see https://docs.streamlit.io"
             )
 
-    if url(target):
+    if url_util.is_url(target):
         from streamlit.temporary_directory import TemporaryDirectory
 
         with TemporaryDirectory() as temp_dir:
             from urllib.parse import urlparse
 
-            from streamlit import url_util
-
             path = urlparse(target).path
             main_script_path = os.path.join(
                 temp_dir, path.strip("/").rsplit("/", 1)[-1]

diff --git a/lib/tests/streamlit/commands/page_config_test.py b/lib/tests/streamlit/commands/page_config_test.py
@@ -17,12 +17,7 @@
 from parameterized import param, parameterized
 
 import streamlit as st
-from streamlit.commands.page_config import (
-    ENG_EMOJIS,
-    RANDOM_EMOJIS,
-    PageIcon,
-    valid_url,
-)
+from streamlit.commands.page_config import ENG_EMOJIS, RANDOM_EMOJIS, PageIcon
 from streamlit.errors import StreamlitAPIException
 from streamlit.proto.PageConfig_pb2 import PageConfig as PageConfigProto
 from streamlit.string_util import is_emoji
@@ -154,20 +149,3 @@ def test_set_page_config_menu_items_empty_dict(self):
         st.set_page_config(menu_items={})
         c = self.get_message_from_queue().page_config_changed.menu_items
         self.assertEqual(c.about_section_md, "")
-
-    @parameterized.expand(
-        [
-            ("http://www.cwi.nl:80/%7Eguido/Python.html", True),
-            ("/data/Python.html", False),
-            (532, False),
-            ("dkakasdkjdjakdjadjfalskdjfalk", False),
-            ("https://stackoverflow.com", True),
-            ("mailto:[email protected]", True),
-            ("mailto:", False),
-        ]
-    )
-    def test_valid_url(self, url, expected_value):
-        if expected_value:
-            self.assertTrue(valid_url(url))
-        else:
-            self.assertFalse(valid_url(url))
diff --git a/lib/tests/streamlit/url_util_test.py b/lib/tests/streamlit/url_util_test.py
@@ -12,7 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
 import unittest
+from typing import Any, Tuple
+
+from parameterized import parameterized
 
 from streamlit import url_util
 
@@ -63,13 +68,70 @@
 
 class GitHubUrlTest(unittest.TestCase):
     def test_github_url_is_replaced(self):
-        for (target, processed) in GITHUB_URLS:
+        for target, processed in GITHUB_URLS:
             assert url_util.process_gitblob_url(target) == processed
 
     def test_gist_url_is_replaced(self):
-        for (target, processed) in GIST_URLS:
+        for target, processed in GIST_URLS:
             assert url_util.process_gitblob_url(target) == processed
 
     def test_nonmatching_url_is_not_replaced(self):
         for url in INVALID_URLS:
             assert url == url_util.process_gitblob_url(url)
+
+
+class UrlUtilTest(unittest.TestCase):
+    @parameterized.expand(
+        [
+            # Valid URLs:
+            ("http://www.cwi.nl:80/%7Eguido/Python.html", True),
+            ("https://stackoverflow.com", True),
+            ("mailto:[email protected]", True),
+            ("data:image/svg+xml;base64,PHN2ZyB4aHcvMjAwMC9zdmci", True),
+            ("data:application/pdf;base64,PHN2ZyB4aHcvMjAwMC9zdmci", True),
+            ("http://127.0.0.1", True),  # IP as domain
+            ("https://[::1]", True),  # IPv6 address in URL
+            # Invalid URLs:
+            ("/data/Python.html", False),
+            ("www.streamlit.io", False),  # Missing scheme
+            (532, False),
+            ("dkakasdkjdjakdjadjfalskdjfalk", False),
+            ("mailto:", False),
+            ("ftp://example.com/resource", False),  # Unsupported scheme
+            ("https:///path/to/resource", False),  # Missing netloc
+        ]
+    )
+    def test_is_url(self, url: Any, expected_value: bool):
+        """Test the is_url utility function."""
+        self.assertEqual(
+            url_util.is_url(url, ("http", "https", "data", "mailto")), expected_value
+        )
+
+    @parameterized.expand(
+        [
+            ("http://example.com", ("http",), True),
+            ("mailto:[email protected]", ("http", "https"), False),
+            ("mailto:[email protected]", ("http", "mailto"), True),
+            ("https://example.com", ("http",), False),
+            ("https://example.com", ("https",), True),
+            ("data:image/png;base64,abc123", ("data",), True),
+            ("data:image/png;base64,abc123", ("http", "https", "mailto"), False),
+            ("https://example.com", ("http", "https", "mailto"), True),
+            ("http://example.com", None, True),  # None schema == use default
+            ("https://example.com", None, True),  # None schema == use default
+            ("data:image/png;base64,abc123", None, False),  # None schema == use default
+            ("mailto:[email protected]", None, False),  # None schema == use default
+        ]
+    )
+    def test_is_url_limits_schema(
+        self,
+        url: str,
+        allowed_schemas: Tuple[url_util.UrlSchema, ...] | None,
+        expected_value: bool,
+    ):
+        """Test that is_ur applies the allowed schema parameter."""
+
+        if allowed_schemas is None:
+            self.assertEqual(url_util.is_url(url), expected_value)
+        else:
+            self.assertEqual(url_util.is_url(url, allowed_schemas), expected_value)