Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 9 additions & 24 deletions src/fromager/bootstrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
build_environment,
dependencies,
finders,
gitutils,
hooks,
progress,
resolver,
Expand Down Expand Up @@ -1067,40 +1068,24 @@ def _resolve_version_from_git_url(self, req: Requirement) -> tuple[str, Version]
if not req.url:
raise ValueError(f"unable to resolve from URL with no URL in {req}")

if not req.url.startswith("git+"):
raise ValueError(f"unable to handle URL scheme in {req.url} from {req}")

# We start by not knowing where we would put the source because we don't
# know the version.
working_src_dir: pathlib.Path | None = None
version: Version | None = None

# Clean up the URL so we can parse it
reduced_url = req.url[len("git+") :]
parsed_url = urlparse(reduced_url)

# Save the URL that we think we will use for cloning. This might change
# later if the path has a tag or branch in it.
url_to_clone = reduced_url
url_to_clone, git_ref = gitutils.parse_vcs_url(req.url, require_ref=False)
need_to_clone = False

# If the URL includes an @ with text after it, we use that as the reference
# to clone, but by default we take the default branch.
git_ref: str | None = None

if "@" not in parsed_url.path:
# If we have no reference, we know we are going to have to clone the
# repository to figure out the version to use.
if git_ref == gitutils.GIT_HEAD:
# No ref in URL, clone to discover the version.
logger.debug("no reference in URL, will clone")
need_to_clone = True
else:
# If we have a reference, it might be a valid python version string, or
# not. It _must_ be a valid git reference. If it can be parsed as a
# valid python version, we assume the tag points to source that will
# think that is its version, so we allow reusing an existing cloned repo
# if there is one.
new_path, _, git_ref = parsed_url.path.rpartition("@")
url_to_clone = parsed_url._replace(path=new_path).geturl()
# If we have a reference, it might be a valid python version
# string, or not. It _must_ be a valid git reference. If it can
# be parsed as a valid python version, we assume the tag points
# to source that will think that is its version, so we allow
# reusing an existing cloned repo if there is one.
try:
version = Version(git_ref)
except ValueError:
Expand Down
50 changes: 49 additions & 1 deletion src/fromager/gitutils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import pathlib
import typing
from urllib.parse import urlparse

from packaging.requirements import Requirement
Expand All @@ -8,6 +9,9 @@

logger = logging.getLogger(__name__)

#: Default git ref (current HEAD of the default branch)
GIT_HEAD: typing.Final = "HEAD"


def git_clone(
*,
Expand Down Expand Up @@ -67,7 +71,7 @@ def git_clone_fast(
*,
output_dir: pathlib.Path,
repo_url: str,
ref: str = "HEAD",
ref: str = GIT_HEAD,
) -> None:
"""Efficient, blobless git clone with all submodules

Expand Down Expand Up @@ -134,3 +138,47 @@ def git_clone_fast(
external_commands.run(cmd, cwd=str(output_dir), network_isolation=False)
else:
logger.debug("no .gitmodules file")


def parse_vcs_url(vcs_url: str, *, require_ref: bool = True) -> tuple[str, str]:
"""Parse a `pip VCS URL`_ into a repo clone URL and git ref.

Parses a string like ``git+https://github.com/python-wheel-build/fromager.git@refs/tags/0.88.0``
into a git clone URL and git reference.

When *require_ref* is ``False`` and the URL has no ``@ref``,
the ref defaults to `GIT_HEAD` instead of raising an error.

.. _pip VCS URL: https://pip.pypa.io/en/stable/topics/vcs-support/

.. versionadded:: 0.89.0
"""
parsed = urlparse(vcs_url)

supported_schemes: set[str] = {"git+https", "git+ssh"}
if parsed.scheme not in supported_schemes:
raise ValueError(
f"unsupported VCS URL scheme {parsed.scheme!r},"
f" expected one of {sorted(supported_schemes)}"
)

# The ref is appended to the path after the last '@'.
path_before, sep, ref = parsed.path.rpartition("@")
if sep and not ref:
raise ValueError(f"VCS URL {vcs_url!r} has an empty ref after '@'")
if not sep:
if require_ref:
raise ValueError(
f"VCS URL {vcs_url!r} is missing a mandatory ref after '@'"
)
path_before = parsed.path
ref = GIT_HEAD

# Reconstruct the plain repo URL without the git+ prefix and ref
repo_url = parsed._replace(
scheme=parsed.scheme[4:],
path=path_before,
fragment="",
).geturl()

return repo_url, ref
24 changes: 1 addition & 23 deletions src/fromager/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import tarfile
import typing
import zipfile
from urllib.parse import urlparse

import resolvelib
from packaging.requirements import Requirement
Expand Down Expand Up @@ -77,25 +76,7 @@ def download_source(
download_path = ctx.work_dir / f"{req.name}-{version}" / f"{req.name}-{version}"
download_path.mkdir(parents=True, exist_ok=True)

# Parse ref from URL if it's in the form repo@ref
url_to_clone = req.url
git_ref: str | None = None

# Remove git+ prefix for parsing
parsing_url = url_to_clone
if parsing_url.startswith("git+"):
parsing_url = parsing_url[len("git+") :]

parsed_url = urlparse(parsing_url)
if "@" in parsed_url.path:
# Extract the ref from the path (e.g., /path/to/repo.git@5.2.0 -> @5.2.0)
new_path, _, git_ref = parsed_url.path.rpartition("@")
# Reconstruct URL without the @ref part
url_to_clone = parsed_url._replace(path=new_path).geturl()
# Add back git+ prefix if it was present
if req.url.startswith("git+"):
url_to_clone = f"git+{url_to_clone}"

url_to_clone, git_ref = gitutils.parse_vcs_url(req.url)
download_git_source(
ctx=ctx,
req=req,
Expand Down Expand Up @@ -239,9 +220,6 @@ def download_git_source(
destination_dir: pathlib.Path,
ref: str | None = None,
) -> None:
if url_to_clone.startswith("git+"):
url_to_clone = url_to_clone[len("git+") :]

logger.info(f"cloning source from {url_to_clone}@{ref} to {destination_dir}")
# Get git options from package settings
pbi = ctx.package_build_info(req)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,5 +884,5 @@ def test_resolve_version_from_git_url_invalid_scheme(
req = Requirement("test-pkg @ https://github.com/example/repo.git")

bs = bootstrapper.Bootstrapper(tmp_context)
with pytest.raises(ValueError, match="unable to handle URL scheme"):
with pytest.raises(ValueError, match="unsupported VCS URL scheme"):
bs._resolve_version_from_git_url(req)
32 changes: 30 additions & 2 deletions tests/test_gitutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytest
from packaging.version import Version

from fromager.gitutils import git_clone_fast
from fromager.gitutils import GIT_HEAD, git_clone_fast, parse_vcs_url

needs_git_command = pytest.mark.skipif(
shutil.which("git") is None, reason="requires 'git' command"
Expand Down Expand Up @@ -47,7 +47,7 @@ def test_git_clone_fast(m_run: Mock, tmp_path: pathlib.Path) -> None:
[
"git",
"checkout",
"HEAD",
GIT_HEAD,
],
network_isolation=False,
cwd=str(tmp_path),
Expand Down Expand Up @@ -76,6 +76,34 @@ def test_git_clone_fast_submodules(m_run: Mock, tmp_path: pathlib.Path) -> None:
)


def test_parse_vcs_url() -> None:
assert parse_vcs_url("git+https://git.test/org/project.git@v1.0") == (
"https://git.test/org/project.git",
"v1.0",
)
# '@' in netloc must not be confused with the ref '@'
assert parse_vcs_url("git+ssh://git@git.test/org/project.git@abc123") == (
"ssh://git@git.test/org/project.git",
"abc123",
)
# require_ref=False returns GIT_HEAD when no ref is present
assert parse_vcs_url("git+https://git.test/org/project.git", require_ref=False) == (
"https://git.test/org/project.git",
GIT_HEAD,
)


def test_parse_vcs_url_errors() -> None:
with pytest.raises(ValueError, match="missing a mandatory ref"):
parse_vcs_url("git+https://git.test/org/project.git")
with pytest.raises(ValueError, match="empty ref"):
parse_vcs_url("git+https://git.test/org/project.git@")
with pytest.raises(ValueError, match="empty ref"):
parse_vcs_url("git+https://git.test/org/project.git@", require_ref=False)
with pytest.raises(ValueError, match="unsupported VCS URL scheme"):
parse_vcs_url("git+http://git.test/org/project.git@v1.0")


@pytest.mark.network
@needs_git_command
def test_git_clone_real(tmp_path: pathlib.Path) -> None:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ def test_download_source_git_with_ref(
mock_git.assert_called_once()
call_kwargs = mock_git.call_args[1]
assert call_kwargs["ref"] == "v2.0"
assert call_kwargs["url_to_clone"] == "git+https://github.com/org/pkg.git"
assert call_kwargs["url_to_clone"] == "https://github.com/org/pkg.git"
assert result == tmp_context.work_dir / "pkg-2.0" / "pkg-2.0"


Expand Down
Loading