Skip to content

Commit 71d6a8c

Browse files
committed
Update code to fix errors
1 parent 9631c56 commit 71d6a8c

File tree

2 files changed

+82
-33
lines changed

2 files changed

+82
-33
lines changed

src/pip/_internal/index/package_finder.py

Lines changed: 79 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,17 @@ def _extract_version_from_fragment(fragment: str, canonical_name: str) -> str |
10741074
return version
10751075

10761076

1077+
def _repo_base(url: str) -> str:
1078+
"""
1079+
Convert:
1080+
https://a.com/simple/mypackage
1081+
→ https://a.com/simple/
1082+
"""
1083+
s = urllib.parse.urlsplit(url)
1084+
path = s.path.rsplit("/", 1)[0] + "/"
1085+
return urllib.parse.urlunsplit((s.scheme, s.netloc, path, "", ""))
1086+
1087+
10771088
def check_multiple_remote_repositories(
10781089
candidates: list[InstallationCandidate], project_name: str
10791090
) -> None:
@@ -1166,7 +1177,6 @@ def check_multiple_remote_repositories(
11661177
# to question that and we assume that they’ve made sure it is safe to merge
11671178
# those namespaces. If the end user has explicitly told the installer to fetch
11681179
# the project from specific repositories, filter out all other repositories.
1169-
# import pytest; pytest.set_trace() # TODO: REMOVE FULLY.
11701180
# When no candidates are provided, then no checks are relevant, so just return.
11711181
if candidates is None or len(candidates) == 0:
11721182
logger.debug("No candidates given to multiple remote repository checks")
@@ -1179,8 +1189,10 @@ def check_multiple_remote_repositories(
11791189
# if they do then determine if either “Tracks” or “Alternate Locations” metadata
11801190
# allows safely merging together ALL the repositories where files were discovered.
11811191
remote_candidates = []
1182-
# If every remote candidate lacks repository metadata (common in tests using raw links),
1183-
# then treat them as coming from a single implicit repository and skip multi-repo checks.
1192+
# If every remote candidate lacks repository metadata
1193+
# (common in tests using raw links),
1194+
# then treat them as coming from a single implicit
1195+
# repository and skip multi-repo checks.
11841196
# all known remote repositories
11851197
known_remote_repo_urls = set()
11861198
# all known alternate location urls
@@ -1230,7 +1242,6 @@ def check_multiple_remote_repositories(
12301242
if len(remote_candidates) == 0:
12311243
logger.debug("No remote candidates for multiple remote repository checks")
12321244
return None
1233-
12341245
if all(not rc.remote_repository_urls for rc in remote_candidates):
12351246
logger.debug("All remote candidates lack repository metadata.")
12361247
return None
@@ -1260,14 +1271,30 @@ def check_multiple_remote_repositories(
12601271
# This means that the known candidate metadata might agree and pass this check,
12611272
# while retrieving the metadata from additional urls would not agree and would
12621273
# fail this check.
1263-
# Specification: In order for this metadata to be trusted, there MUST be agreement
1264-
# between all locations where that project is found as to what the alternate
1274+
# Specification: In order for this metadata to be trusted,
1275+
# there MUST be agreement
1276+
# between all locations where that project is
1277+
# found as to what the alternate
12651278
# locations are.
1266-
if len(mismatch_alternate_urls) > 0:
1279+
# Only validate alternate locations if any
1280+
# candidate actually declares alternate locations.
1281+
all_explicit_alt_urls = [
1282+
rc.alternate_location_urls - ({rc.url} if rc.url else set())
1283+
for rc in remote_candidates
1284+
]
1285+
mismatch_explicit_alt_urls: set[str] = set()
1286+
owner_urls = {rc.url for rc in remote_candidates if not rc.project_track_urls}
1287+
for urls in all_explicit_alt_urls:
1288+
mismatch_explicit_alt_urls.symmetric_difference_update(urls - owner_urls)
1289+
1290+
if (
1291+
any(len(urls) > 0 for urls in all_explicit_alt_urls)
1292+
and len(mismatch_explicit_alt_urls) > 0
1293+
):
12671294
raise InvalidAlternativeLocationsUrl(
12681295
package=project_name,
12691296
remote_repositories=known_remote_repo_urls,
1270-
invalid_locations=mismatch_alternate_urls,
1297+
invalid_locations=mismatch_explicit_alt_urls,
12711298
)
12721299

12731300
# Check the Tracks metadata.
@@ -1311,40 +1338,61 @@ def check_multiple_remote_repositories(
13111338
# TODO: Without requesting all repositories revealed by metadata, this
13121339
# check might pass with incomplete knowledge of all metadata,
13131340
# when it would fail after retrieving all metadata.
1314-
if project_track_url not in known_owner_repo_urls:
1315-
raise InvalidTracksUrl(
1316-
package=project_name,
1317-
remote_repositories={page_url} if page_url is not None else set(),
1318-
invalid_tracks={project_track_url},
1319-
)
1341+
track_repo = _repo_base(project_track_url)
1342+
1343+
# Determine whether there is only a single real origin repository.
1344+
single_origin = len({rc.url for rc in remote_candidates}) == 1
1345+
1346+
if not single_origin:
1347+
if track_repo not in {_repo_base(u) for u in known_owner_repo_urls}:
1348+
raise InvalidTracksUrl(
1349+
package=project_name,
1350+
remote_repositories=(
1351+
{page_url} if page_url is not None else set()
1352+
),
1353+
invalid_tracks={project_track_url},
1354+
)
1355+
1356+
if len(remote_candidates) == 1:
1357+
logger.debug(
1358+
"Single remote candidate; Tracks/Alternate Locations validated — "
1359+
"skipping multi-repo namespace intersection checks."
1360+
)
1361+
return None
13201362

13211363
# Specification: If nothing tells us merging the namespaces is safe, we refuse to
13221364
# implicitly assume it is, and generate an error instead.
13231365
# Specification: If that metadata does NOT allow [merging namespaces], then
13241366
# generate an error.
13251367
# Implementation Note: If there are two or more remote candidates, and any of them
13261368
# don't have valid Alternate Locations and/or Tracks metadata, then fail.
1327-
for remote_candidate in remote_candidates:
1328-
candidate_alt_urls = remote_candidate.alternate_location_urls
1329-
1330-
invalid_alt_urls = known_alternate_urls - candidate_alt_urls
1331-
has_alts = len(candidate_alt_urls) > 0
1332-
has_tracks = len(remote_candidate.project_track_urls) > 0
1369+
namespaces = []
13331370

1334-
is_invalid = any(
1335-
[
1336-
not has_alts and not has_tracks,
1337-
not has_tracks and invalid_alt_urls,
1338-
]
1371+
for remote_candidate in remote_candidates:
1372+
candidate_origin = {remote_candidate.url}
1373+
declared_sources = (
1374+
candidate_origin
1375+
| remote_candidate.project_track_urls
1376+
| remote_candidate.alternate_location_urls
13391377
)
1378+
namespaces.append(declared_sources)
13401379

1341-
if is_invalid:
1342-
error = UnsafeMultipleRemoteRepositories(
1380+
# Check if every namespace intersects at least one other namespace
1381+
# (otherwise we found a repository that doesn't belong to the merged set)
1382+
for i, ns in enumerate(namespaces):
1383+
if not any(ns & other for j, other in enumerate(namespaces) if i != j):
1384+
raise UnsafeMultipleRemoteRepositories(
13431385
package=project_name,
1344-
remote_repositories=known_remote_repo_urls,
1386+
remote_repositories=ns,
13451387
)
1346-
raise error
13471388

1348-
# Specification: Otherwise [if metadata allows] we merge the namespaces,
1349-
# and continue on.
1389+
all_declared_sources = set().union(*namespaces)
1390+
invalid_sources = all_declared_sources - known_remote_repo_urls
1391+
1392+
if invalid_sources:
1393+
raise UnsafeMultipleRemoteRepositories(
1394+
package=project_name,
1395+
remote_repositories=invalid_sources,
1396+
)
1397+
13501398
return None

tests/unit/test_index.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import logging
4+
from typing import Optional
45

56
import pytest
67

@@ -995,7 +996,7 @@ def _make_mock_candidate_check_remote_repo(
995996
"mypackage",
996997
None,
997998
),
998-
# checks pass when ony one candidate with alt loc url
999+
# checks pass when only one candidate with alt loc url
9991000
# TODO: not making requests to repos revealed via metadata
10001001
(
10011002
[
@@ -1087,7 +1088,7 @@ def _make_mock_candidate_check_remote_repo(
10871088
],
10881089
)
10891090
def test_check_multiple_remote_repositories(
1090-
caplog, candidates: List[InstallationCandidate], project_name: str, expected
1091+
caplog, candidates: list[InstallationCandidate], project_name: str, expected
10911092
):
10921093
caplog.set_level(logging.DEBUG)
10931094
if expected:

0 commit comments

Comments
 (0)