From 07e7b5071a88f8305c0818530fddfefff13121c5 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 17 Feb 2025 22:46:29 +0000 Subject: [PATCH 1/6] Add validation for GitHub repo paths in Codebase constructor When users try to pass a GitHub repo path (e.g., 'fastapi/fastapi') to the Codebase constructor, raise a ValueError guiding them to use Codebase.from_repo() instead. Co-Authored-By: jay@codegen.com --- src/codegen/sdk/core/codebase.py | 5 +++++ .../codebase/test_codebase_github_repo.py | 20 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tests/unit/python/codebase/test_codebase_github_repo.py diff --git a/src/codegen/sdk/core/codebase.py b/src/codegen/sdk/core/codebase.py index 0de1c1f2e..27669259c 100644 --- a/src/codegen/sdk/core/codebase.py +++ b/src/codegen/sdk/core/codebase.py @@ -177,6 +177,11 @@ def __init__( # Initialize project with repo_path if projects is None if repo_path is not None: + # Add validation to detect GitHub repo paths + if "/" in repo_path and not os.path.exists(repo_path): + if len(repo_path.split("/")) == 2: # Looks like "owner/repo" + msg = f"Path '{repo_path}' looks like a GitHub repository path. To create a Codebase from a GitHub repo, use Codebase.from_repo() instead." + raise ValueError(msg) main_project = ProjectConfig.from_path(repo_path, programming_language=ProgrammingLanguage(language.upper()) if language else None) projects = [main_project] else: diff --git a/tests/unit/python/codebase/test_codebase_github_repo.py b/tests/unit/python/codebase/test_codebase_github_repo.py new file mode 100644 index 000000000..74eb36ea4 --- /dev/null +++ b/tests/unit/python/codebase/test_codebase_github_repo.py @@ -0,0 +1,20 @@ +import os +import pytest +from pathlib import Path + +from codegen.sdk.core.codebase import Codebase + +def test_codebase_github_repo_path() -> None: + """Test that trying to create a Codebase with a GitHub repo path raises an error.""" + with pytest.raises(ValueError, match="looks like a GitHub repository path"): + Codebase(repo_path="fastapi/fastapi") + +def test_codebase_valid_path_with_slash(tmp_path) -> None: + """Test that a valid path containing slashes works correctly.""" + path = tmp_path / "some/nested/path" + os.makedirs(path) + # Create a Python file so language detection works + with open(path / "test.py", "w") as f: + f.write("# Test file") + codebase: Codebase = Codebase(repo_path=str(path)) + assert str(codebase.repo_path) == str(path) From 2ffb7b63c694971e0018a4839dbfdccaf2f89111 Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 17 Feb 2025 22:47:15 +0000 Subject: [PATCH 2/6] Automated pre-commit update --- tests/unit/python/codebase/test_codebase_github_repo.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/python/codebase/test_codebase_github_repo.py b/tests/unit/python/codebase/test_codebase_github_repo.py index 74eb36ea4..7445f596a 100644 --- a/tests/unit/python/codebase/test_codebase_github_repo.py +++ b/tests/unit/python/codebase/test_codebase_github_repo.py @@ -1,14 +1,16 @@ import os + import pytest -from pathlib import Path from codegen.sdk.core.codebase import Codebase + def test_codebase_github_repo_path() -> None: """Test that trying to create a Codebase with a GitHub repo path raises an error.""" with pytest.raises(ValueError, match="looks like a GitHub repository path"): Codebase(repo_path="fastapi/fastapi") + def test_codebase_valid_path_with_slash(tmp_path) -> None: """Test that a valid path containing slashes works correctly.""" path = tmp_path / "some/nested/path" From eff009e737dadb499034755f0d1eff24e62d08db Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 17 Feb 2025 22:52:19 +0000 Subject: [PATCH 3/6] test: Initialize git repo in test directory Co-Authored-By: jay@codegen.com --- tests/unit/python/codebase/test_codebase_github_repo.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/unit/python/codebase/test_codebase_github_repo.py b/tests/unit/python/codebase/test_codebase_github_repo.py index 7445f596a..cf60e7aa4 100644 --- a/tests/unit/python/codebase/test_codebase_github_repo.py +++ b/tests/unit/python/codebase/test_codebase_github_repo.py @@ -13,6 +13,10 @@ def test_codebase_github_repo_path() -> None: def test_codebase_valid_path_with_slash(tmp_path) -> None: """Test that a valid path containing slashes works correctly.""" + # Initialize git repo at tmp_path + import subprocess + subprocess.run(["git", "init"], cwd=str(tmp_path), check=True) + path = tmp_path / "some/nested/path" os.makedirs(path) # Create a Python file so language detection works From d53072bf687cf15f40f84b289445306668803a3d Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 17 Feb 2025 22:53:14 +0000 Subject: [PATCH 4/6] Automated pre-commit update --- tests/unit/python/codebase/test_codebase_github_repo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/python/codebase/test_codebase_github_repo.py b/tests/unit/python/codebase/test_codebase_github_repo.py index cf60e7aa4..642292a88 100644 --- a/tests/unit/python/codebase/test_codebase_github_repo.py +++ b/tests/unit/python/codebase/test_codebase_github_repo.py @@ -15,8 +15,9 @@ def test_codebase_valid_path_with_slash(tmp_path) -> None: """Test that a valid path containing slashes works correctly.""" # Initialize git repo at tmp_path import subprocess + subprocess.run(["git", "init"], cwd=str(tmp_path), check=True) - + path = tmp_path / "some/nested/path" os.makedirs(path) # Create a Python file so language detection works From e596697d8a5c5a451f3ebb9e953eb8ea5e071249 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 17 Feb 2025 22:58:36 +0000 Subject: [PATCH 5/6] test: Fix test to expect repo root path Co-Authored-By: jay@codegen.com --- tests/unit/python/codebase/test_codebase_github_repo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/python/codebase/test_codebase_github_repo.py b/tests/unit/python/codebase/test_codebase_github_repo.py index 642292a88..adcc0bd58 100644 --- a/tests/unit/python/codebase/test_codebase_github_repo.py +++ b/tests/unit/python/codebase/test_codebase_github_repo.py @@ -24,4 +24,5 @@ def test_codebase_valid_path_with_slash(tmp_path) -> None: with open(path / "test.py", "w") as f: f.write("# Test file") codebase: Codebase = Codebase(repo_path=str(path)) - assert str(codebase.repo_path) == str(path) + # When initializing a Codebase from a path within a git repo, it uses the repo root + assert str(codebase.repo_path) == str(tmp_path) From e7dbcfd8f0e735458aaaaa9bfbbb5a845a5f62f9 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 17 Feb 2025 23:38:39 +0000 Subject: [PATCH 6/6] feat: improve GitHub URL validation in Codebase constructor Co-Authored-By: jay@codegen.com --- src/codegen/sdk/core/codebase.py | 29 +++++++++++++-- .../codebase/test_codebase_github_repo.py | 37 +++++++++++++++++++ 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/codegen/sdk/core/codebase.py b/src/codegen/sdk/core/codebase.py index 27669259c..d1ef40c0e 100644 --- a/src/codegen/sdk/core/codebase.py +++ b/src/codegen/sdk/core/codebase.py @@ -10,6 +10,7 @@ from functools import cached_property from pathlib import Path from typing import TYPE_CHECKING, Generic, Literal, TypeVar, Unpack, overload +from urllib.parse import urlparse import plotly.graph_objects as go import rich.repr @@ -177,11 +178,31 @@ def __init__( # Initialize project with repo_path if projects is None if repo_path is not None: - # Add validation to detect GitHub repo paths - if "/" in repo_path and not os.path.exists(repo_path): - if len(repo_path.split("/")) == 2: # Looks like "owner/repo" - msg = f"Path '{repo_path}' looks like a GitHub repository path. To create a Codebase from a GitHub repo, use Codebase.from_repo() instead." + # Add validation to detect GitHub repo paths and URLs + # Clean up the path - remove angle brackets and whitespace + cleaned_path = repo_path.strip("<> \t\n") + + if not os.path.exists(cleaned_path): + # Parse URL to handle various GitHub URL formats + parsed_url = urlparse(cleaned_path) + path_parts = parsed_url.path.strip("/").split("/") + + # Check for GitHub URLs (e.g., https://github.com/owner/repo) + if parsed_url.netloc == "github.com" or parsed_url.path.startswith("github.com/"): + owner_repo = "/".join(path_parts[-2:]) if len(path_parts) >= 2 else "" + msg = f"Path '{repo_path}' is a GitHub URL. To create a Codebase from a GitHub repo, use Codebase.from_repo('{owner_repo}') instead." raise ValueError(msg) + # Check for GitHub repo paths (e.g., owner/repo) + if len(path_parts) == 2 and "/" in cleaned_path and not parsed_url.scheme and not parsed_url.netloc: + msg = f"Path '{repo_path}' looks like a GitHub repository path. To create a Codebase from a GitHub repo, use Codebase.from_repo('{cleaned_path}') instead." + raise ValueError(msg) + + # For non-GitHub paths that don't exist, provide a clearer error + if os.path.isabs(cleaned_path): + msg = f"Local path '{repo_path}' does not exist. Please provide a valid local directory path." + else: + msg = f"Local path '{repo_path}' does not exist. Please provide a valid local directory path (relative paths like '{cleaned_path}' are allowed if they exist)." + raise ValueError(msg) main_project = ProjectConfig.from_path(repo_path, programming_language=ProgrammingLanguage(language.upper()) if language else None) projects = [main_project] else: diff --git a/tests/unit/python/codebase/test_codebase_github_repo.py b/tests/unit/python/codebase/test_codebase_github_repo.py index adcc0bd58..214b63390 100644 --- a/tests/unit/python/codebase/test_codebase_github_repo.py +++ b/tests/unit/python/codebase/test_codebase_github_repo.py @@ -11,6 +11,43 @@ def test_codebase_github_repo_path() -> None: Codebase(repo_path="fastapi/fastapi") +def test_codebase_github_url_formats() -> None: + """Test that trying to create a Codebase with various GitHub URL formats raises an error.""" + urls = [ + "https://github.com/fastapi/fastapi", + "https://github.com/fastapi/fastapi.git", + "http://github.com/fastapi/fastapi", + "github.com/fastapi/fastapi", + "", + "git@github.com:fastapi/fastapi.git", + ] + for url in urls: + with pytest.raises(ValueError, match="is a GitHub URL"): + Codebase(repo_path=url) + + +def test_codebase_github_url_with_path() -> None: + """Test that trying to create a Codebase with a GitHub URL containing extra path components raises an error.""" + with pytest.raises(ValueError, match="is a GitHub URL"): + Codebase(repo_path="https://github.com/fastapi/fastapi/tree/main") + + +def test_codebase_nonexistent_local_paths() -> None: + """Test that trying to create a Codebase with nonexistent local paths raises appropriate errors.""" + # Absolute path + with pytest.raises(ValueError, match="Local path .* does not exist"): + Codebase(repo_path="/nonexistent/path") + + # Relative path + with pytest.raises(ValueError, match="relative paths like"): + Codebase(repo_path="path/to/file") + + # String variable + test_string = "some/code/here" + with pytest.raises(ValueError, match="relative paths like"): + Codebase(repo_path=test_string) + + def test_codebase_valid_path_with_slash(tmp_path) -> None: """Test that a valid path containing slashes works correctly.""" # Initialize git repo at tmp_path