Skip to content

Commit

Permalink
Fix issue using a local path with st.image on windows. (streamlit#8092
Browse files Browse the repository at this point in the history
)

## Describe your changes

This PR fixes an issue with using image files via a local path on
Windows (streamlit#7271). This also
unifies all URL checks in the Streamlit server with a Python native
implementation and cuts out the `validators` dependency.

## GitHub Issue Link (if applicable)

- Closes streamlit#7271

## Testing Plan

- Updated tests

---

**Contribution License Agreement**

By submitting this pull request you agree that all contributions to this
project are made under the Apache 2.0 license.
  • Loading branch information
lukasmasuch authored Feb 6, 2024
1 parent e2a8204 commit 60d0ee6
Show file tree
Hide file tree
Showing 10 changed files with 144 additions and 83 deletions.
1 change: 0 additions & 1 deletion lib/min-constraints-gen.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,4 @@ toml==0.10.1
tornado==6.0.3
typing-extensions==4.3.0
tzlocal==1.1
validators==0.2
watchdog==2.1.5
1 change: 0 additions & 1 deletion lib/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
"toml>=0.10.1, <2",
"typing-extensions>=4.3.0, <5",
"tzlocal>=1.1, <6",
"validators>=0.2, <1",
# Don't require watchdog on MacOS, since it'll fail without xcode tools.
# Without watchdog, we fallback to a polling file watcher to check for app changes.
"watchdog>=2.1.5; platform_system != 'Darwin'",
Expand Down
20 changes: 5 additions & 15 deletions lib/streamlit/commands/page_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import random
from textwrap import dedent
from typing import TYPE_CHECKING, Mapping, Optional, Union, cast
from urllib.parse import urlparse

from typing_extensions import Final, Literal, TypeAlias

Expand All @@ -26,6 +25,7 @@
from streamlit.runtime.metrics_util import gather_metrics
from streamlit.runtime.scriptrunner import get_script_run_ctx
from streamlit.string_util import is_emoji
from streamlit.url_util import is_url
from streamlit.util import lower_clean_dict_keys

if TYPE_CHECKING:
Expand Down Expand Up @@ -259,21 +259,11 @@ def validate_menu_items(menu_items: MenuItems) -> None:
'"Get help", "Report a bug", and "About" '
f'("{k}" is not a valid key.)'
)
if v is not None:
if not valid_url(v) and k != ABOUT_KEY:
raise StreamlitAPIException(f'"{v}" is a not a valid URL!')
if v is not None and (
not is_url(v, ("http", "https", "mailto")) and k != ABOUT_KEY
):
raise StreamlitAPIException(f'"{v}" is a not a valid URL!')


def valid_menu_item_key(key: str) -> "TypeGuard[MenuKey]":
return key in {GET_HELP_KEY, REPORT_A_BUG_KEY, ABOUT_KEY}


def valid_url(url: str) -> bool:
# Function taken from https://stackoverflow.com/questions/7160737/how-to-validate-a-url-in-python-malformed-or-not
try:
result = urlparse(url)
if result.scheme == "mailto":
return all([result.scheme, result.path])
return all([result.scheme, result.netloc])
except Exception:
return False
24 changes: 11 additions & 13 deletions lib/streamlit/elements/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@
import base64
import io
import mimetypes
import os
import re
from enum import IntEnum
from typing import TYPE_CHECKING, List, Optional, Sequence, Union, cast
from urllib.parse import urlparse

import numpy as np
from PIL import GifImagePlugin, Image, ImageFile
from typing_extensions import Final, Literal, TypeAlias

from streamlit import runtime
from streamlit import runtime, url_util
from streamlit.errors import StreamlitAPIException
from streamlit.logger import get_logger
from streamlit.proto.Image_pb2 import ImageList as ImageListProto
Expand Down Expand Up @@ -342,8 +342,15 @@ def image_to_url(

# Strings
if isinstance(image, str):
# Unpack local SVG image file to an SVG string
if image.endswith(".svg") and not image.startswith(("http://", "https://")):

if not os.path.isfile(image) and url_util.is_url(
image, allowed_schemas=("http", "https", "data")
):
# If it's a url, return it directly.
return image

if image.endswith(".svg") and os.path.isfile(image):
# Unpack local SVG image file to an SVG string
with open(image) as textfile:
image = textfile.read()

Expand All @@ -361,15 +368,6 @@ def image_to_url(
# Return SVG as data URI:
return f"data:image/svg+xml;base64,{image_b64_encoded}"

# If it's a url, return it directly.
try:
p = urlparse(image)
if p.scheme:
return image
except UnicodeDecodeError:
# If the string runs into a UnicodeDecodeError, we assume it is not a valid URL.
pass

# Otherwise, try to open it as a file.
try:
with open(image, "rb") as f:
Expand Down
15 changes: 8 additions & 7 deletions lib/streamlit/elements/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from typing_extensions import Final, TypeAlias

import streamlit as st
from streamlit import runtime, type_util
from streamlit import runtime, type_util, url_util
from streamlit.errors import StreamlitAPIException
from streamlit.proto.Audio_pb2 import Audio as AudioProto
from streamlit.proto.Video_pb2 import Video as VideoProto
Expand Down Expand Up @@ -280,16 +280,16 @@ def marshall_video(
start_time : int
The time from which this element should start playing. (default: 0)
"""
from validators import url

proto.start_time = start_time

# "type" distinguishes between YouTube and non-YouTube links
proto.type = VideoProto.Type.NATIVE

if isinstance(data, str) and url(data):
youtube_url = _reshape_youtube_url(data)
if youtube_url:
if isinstance(data, str) and url_util.is_url(
data, allowed_schemas=("http", "https", "data")
):
if youtube_url := _reshape_youtube_url(data):
proto.url = youtube_url
proto.type = VideoProto.Type.YOUTUBE_IFRAME
else:
Expand Down Expand Up @@ -405,11 +405,12 @@ def marshall_audio(
sample_rate: int or None
Optional param to provide sample_rate in case of numpy array
"""
from validators import url

proto.start_time = start_time

if isinstance(data, str) and url(data):
if isinstance(data, str) and url_util.is_url(
data, allowed_schemas=("http", "https", "data")
):
proto.url = data

else:
Expand Down
46 changes: 41 additions & 5 deletions lib/streamlit/url_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@
# limitations under the License.

import re
import urllib
from typing import Optional
from typing import Literal, Optional, Tuple
from urllib.parse import urlparse

from typing_extensions import TypeAlias

UrlSchema: TypeAlias = Literal["http", "https", "mailto", "data"]


# Regular expression for process_gitblob_url
_GITBLOB_RE = re.compile(
Expand Down Expand Up @@ -55,15 +60,46 @@ def get_hostname(url: str) -> Optional[str]:
# Just so urllib can parse the URL, make sure there's a protocol.
# (The actual protocol doesn't matter to us)
if "://" not in url:
url = "http://%s" % url
url = f"http://{url}"

parsed = urllib.parse.urlparse(url)
parsed = urlparse(url)
return parsed.hostname


def print_url(title, url):
"""Pretty-print a URL on the terminal."""
import click

click.secho(" %s: " % title, nl=False, fg="blue")
click.secho(f" {title}: ", nl=False, fg="blue")
click.secho(url, bold=True)


def is_url(
url: str,
allowed_schemas: Tuple[UrlSchema, ...] = ("http", "https"),
) -> bool:
"""Check if a string looks like an URL.
This doesn't check if the URL is actually valid or reachable.
Parameters
----------
url : str
The URL to check.
allowed_schemas : Tuple[str]
The allowed URL schemas. Default is ("http", "https").
"""
try:
result = urlparse(str(url))
if result.scheme not in allowed_schemas:
return False

if result.scheme in ["http", "https"]:
return bool(result.netloc)
elif result.scheme in ["mailto", "data"]:
return bool(result.path)

except ValueError:
return False
return False
6 changes: 2 additions & 4 deletions lib/streamlit/web/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def main_run(target: str, args=None, **kwargs):
will download the script to a temporary file and runs this file.
"""
from validators import url
from streamlit import url_util

bootstrap.load_config_options(flag_options=kwargs)

Expand All @@ -211,14 +211,12 @@ def main_run(target: str, args=None, **kwargs):
f"Streamlit requires raw Python (.py) files, not {extension}.\nFor more information, please see https://docs.streamlit.io"
)

if url(target):
if url_util.is_url(target):
from streamlit.temporary_directory import TemporaryDirectory

with TemporaryDirectory() as temp_dir:
from urllib.parse import urlparse

from streamlit import url_util

path = urlparse(target).path
main_script_path = os.path.join(
temp_dir, path.strip("/").rsplit("/", 1)[-1]
Expand Down
24 changes: 1 addition & 23 deletions lib/tests/streamlit/commands/page_config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,7 @@
from parameterized import param, parameterized

import streamlit as st
from streamlit.commands.page_config import (
ENG_EMOJIS,
RANDOM_EMOJIS,
PageIcon,
valid_url,
)
from streamlit.commands.page_config import ENG_EMOJIS, RANDOM_EMOJIS, PageIcon
from streamlit.errors import StreamlitAPIException
from streamlit.proto.PageConfig_pb2 import PageConfig as PageConfigProto
from streamlit.string_util import is_emoji
Expand Down Expand Up @@ -154,20 +149,3 @@ def test_set_page_config_menu_items_empty_dict(self):
st.set_page_config(menu_items={})
c = self.get_message_from_queue().page_config_changed.menu_items
self.assertEqual(c.about_section_md, "")

@parameterized.expand(
[
("http://www.cwi.nl:80/%7Eguido/Python.html", True),
("/data/Python.html", False),
(532, False),
("dkakasdkjdjakdjadjfalskdjfalk", False),
("https://stackoverflow.com", True),
("mailto:[email protected]", True),
("mailto:", False),
]
)
def test_valid_url(self, url, expected_value):
if expected_value:
self.assertTrue(valid_url(url))
else:
self.assertFalse(valid_url(url))
66 changes: 64 additions & 2 deletions lib/tests/streamlit/url_util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import unittest
from typing import Any, Tuple

from parameterized import parameterized

from streamlit import url_util

Expand Down Expand Up @@ -63,13 +68,70 @@

class GitHubUrlTest(unittest.TestCase):
def test_github_url_is_replaced(self):
for (target, processed) in GITHUB_URLS:
for target, processed in GITHUB_URLS:
assert url_util.process_gitblob_url(target) == processed

def test_gist_url_is_replaced(self):
for (target, processed) in GIST_URLS:
for target, processed in GIST_URLS:
assert url_util.process_gitblob_url(target) == processed

def test_nonmatching_url_is_not_replaced(self):
for url in INVALID_URLS:
assert url == url_util.process_gitblob_url(url)


class UrlUtilTest(unittest.TestCase):
@parameterized.expand(
[
# Valid URLs:
("http://www.cwi.nl:80/%7Eguido/Python.html", True),
("https://stackoverflow.com", True),
("mailto:[email protected]", True),
("data:image/svg+xml;base64,PHN2ZyB4aHcvMjAwMC9zdmci", True),
("data:application/pdf;base64,PHN2ZyB4aHcvMjAwMC9zdmci", True),
("http://127.0.0.1", True), # IP as domain
("https://[::1]", True), # IPv6 address in URL
# Invalid URLs:
("/data/Python.html", False),
("www.streamlit.io", False), # Missing scheme
(532, False),
("dkakasdkjdjakdjadjfalskdjfalk", False),
("mailto:", False),
("ftp://example.com/resource", False), # Unsupported scheme
("https:///path/to/resource", False), # Missing netloc
]
)
def test_is_url(self, url: Any, expected_value: bool):
"""Test the is_url utility function."""
self.assertEqual(
url_util.is_url(url, ("http", "https", "data", "mailto")), expected_value
)

@parameterized.expand(
[
("http://example.com", ("http",), True),
("mailto:[email protected]", ("http", "https"), False),
("mailto:[email protected]", ("http", "mailto"), True),
("https://example.com", ("http",), False),
("https://example.com", ("https",), True),
("data:image/png;base64,abc123", ("data",), True),
("data:image/png;base64,abc123", ("http", "https", "mailto"), False),
("https://example.com", ("http", "https", "mailto"), True),
("http://example.com", None, True), # None schema == use default
("https://example.com", None, True), # None schema == use default
("data:image/png;base64,abc123", None, False), # None schema == use default
("mailto:[email protected]", None, False), # None schema == use default
]
)
def test_is_url_limits_schema(
self,
url: str,
allowed_schemas: Tuple[url_util.UrlSchema, ...] | None,
expected_value: bool,
):
"""Test that is_ur applies the allowed schema parameter."""

if allowed_schemas is None:
self.assertEqual(url_util.is_url(url), expected_value)
else:
self.assertEqual(url_util.is_url(url, allowed_schemas), expected_value)
Loading

0 comments on commit 60d0ee6

Please sign in to comment.