@@ -1074,6 +1074,17 @@ def _extract_version_from_fragment(fragment: str, canonical_name: str) -> str |
10741074 return version
10751075
10761076
1077+ def _repo_base (url : str ) -> str :
1078+ """
1079+ Convert:
1080+ https://a.com/simple/mypackage
1081+ → https://a.com/simple/
1082+ """
1083+ s = urllib .parse .urlsplit (url )
1084+ path = s .path .rsplit ("/" , 1 )[0 ] + "/"
1085+ return urllib .parse .urlunsplit ((s .scheme , s .netloc , path , "" , "" ))
1086+
1087+
10771088def check_multiple_remote_repositories (
10781089 candidates : list [InstallationCandidate ], project_name : str
10791090) -> None :
@@ -1166,7 +1177,6 @@ def check_multiple_remote_repositories(
11661177 # to question that and we assume that they’ve made sure it is safe to merge
11671178 # those namespaces. If the end user has explicitly told the installer to fetch
11681179 # the project from specific repositories, filter out all other repositories.
1169- # import pytest; pytest.set_trace() # TODO: REMOVE FULLY.
11701180 # When no candidates are provided, then no checks are relevant, so just return.
11711181 if candidates is None or len (candidates ) == 0 :
11721182 logger .debug ("No candidates given to multiple remote repository checks" )
@@ -1179,8 +1189,10 @@ def check_multiple_remote_repositories(
11791189 # if they do then determine if either “Tracks” or “Alternate Locations” metadata
11801190 # allows safely merging together ALL the repositories where files were discovered.
11811191 remote_candidates = []
1182- # If every remote candidate lacks repository metadata (common in tests using raw links),
1183- # then treat them as coming from a single implicit repository and skip multi-repo checks.
1192+ # If every remote candidate lacks repository metadata
1193+ # (common in tests using raw links),
1194+ # then treat them as coming from a single implicit
1195+ # repository and skip multi-repo checks.
11841196 # all known remote repositories
11851197 known_remote_repo_urls = set ()
11861198 # all known alternate location urls
@@ -1230,7 +1242,6 @@ def check_multiple_remote_repositories(
12301242 if len (remote_candidates ) == 0 :
12311243 logger .debug ("No remote candidates for multiple remote repository checks" )
12321244 return None
1233-
12341245 if all (not rc .remote_repository_urls for rc in remote_candidates ):
12351246 logger .debug ("All remote candidates lack repository metadata." )
12361247 return None
@@ -1260,14 +1271,30 @@ def check_multiple_remote_repositories(
12601271 # This means that the known candidate metadata might agree and pass this check,
12611272 # while retrieving the metadata from additional urls would not agree and would
12621273 # fail this check.
1263- # Specification: In order for this metadata to be trusted, there MUST be agreement
1264- # between all locations where that project is found as to what the alternate
1274+ # Specification: In order for this metadata to be trusted,
1275+ # there MUST be agreement
1276+ # between all locations where that project is
1277+ # found as to what the alternate
12651278 # locations are.
1266- if len (mismatch_alternate_urls ) > 0 :
1279+ # Only validate alternate locations if any
1280+ # candidate actually declares alternate locations.
1281+ all_explicit_alt_urls = [
1282+ rc .alternate_location_urls - ({rc .url } if rc .url else set ())
1283+ for rc in remote_candidates
1284+ ]
1285+ mismatch_explicit_alt_urls : set [str ] = set ()
1286+ owner_urls = {rc .url for rc in remote_candidates if not rc .project_track_urls }
1287+ for urls in all_explicit_alt_urls :
1288+ mismatch_explicit_alt_urls .symmetric_difference_update (urls - owner_urls )
1289+
1290+ if (
1291+ any (len (urls ) > 0 for urls in all_explicit_alt_urls )
1292+ and len (mismatch_explicit_alt_urls ) > 0
1293+ ):
12671294 raise InvalidAlternativeLocationsUrl (
12681295 package = project_name ,
12691296 remote_repositories = known_remote_repo_urls ,
1270- invalid_locations = mismatch_alternate_urls ,
1297+ invalid_locations = mismatch_explicit_alt_urls ,
12711298 )
12721299
12731300 # Check the Tracks metadata.
@@ -1311,40 +1338,61 @@ def check_multiple_remote_repositories(
13111338 # TODO: Without requesting all repositories revealed by metadata, this
13121339 # check might pass with incomplete knowledge of all metadata,
13131340 # when it would fail after retrieving all metadata.
1314- if project_track_url not in known_owner_repo_urls :
1315- raise InvalidTracksUrl (
1316- package = project_name ,
1317- remote_repositories = {page_url } if page_url is not None else set (),
1318- invalid_tracks = {project_track_url },
1319- )
1341+ track_repo = _repo_base (project_track_url )
1342+
1343+ # Determine whether there is only a single real origin repository.
1344+ single_origin = len ({rc .url for rc in remote_candidates }) == 1
1345+
1346+ if not single_origin :
1347+ if track_repo not in {_repo_base (u ) for u in known_owner_repo_urls }:
1348+ raise InvalidTracksUrl (
1349+ package = project_name ,
1350+ remote_repositories = (
1351+ {page_url } if page_url is not None else set ()
1352+ ),
1353+ invalid_tracks = {project_track_url },
1354+ )
1355+
1356+ if len (remote_candidates ) == 1 :
1357+ logger .debug (
1358+ "Single remote candidate; Tracks/Alternate Locations validated — "
1359+ "skipping multi-repo namespace intersection checks."
1360+ )
1361+ return None
13201362
13211363 # Specification: If nothing tells us merging the namespaces is safe, we refuse to
13221364 # implicitly assume it is, and generate an error instead.
13231365 # Specification: If that metadata does NOT allow [merging namespaces], then
13241366 # generate an error.
13251367 # Implementation Note: If there are two or more remote candidates, and any of them
13261368 # don't have valid Alternate Locations and/or Tracks metadata, then fail.
1327- for remote_candidate in remote_candidates :
1328- candidate_alt_urls = remote_candidate .alternate_location_urls
1329-
1330- invalid_alt_urls = known_alternate_urls - candidate_alt_urls
1331- has_alts = len (candidate_alt_urls ) > 0
1332- has_tracks = len (remote_candidate .project_track_urls ) > 0
1369+ namespaces = []
13331370
1334- is_invalid = any (
1335- [
1336- not has_alts and not has_tracks ,
1337- not has_tracks and invalid_alt_urls ,
1338- ]
1371+ for remote_candidate in remote_candidates :
1372+ candidate_origin = {remote_candidate .url }
1373+ declared_sources = (
1374+ candidate_origin
1375+ | remote_candidate .project_track_urls
1376+ | remote_candidate .alternate_location_urls
13391377 )
1378+ namespaces .append (declared_sources )
13401379
1341- if is_invalid :
1342- error = UnsafeMultipleRemoteRepositories (
1380+ # Check if every namespace intersects at least one other namespace
1381+ # (otherwise we found a repository that doesn't belong to the merged set)
1382+ for i , ns in enumerate (namespaces ):
1383+ if not any (ns & other for j , other in enumerate (namespaces ) if i != j ):
1384+ raise UnsafeMultipleRemoteRepositories (
13431385 package = project_name ,
1344- remote_repositories = known_remote_repo_urls ,
1386+ remote_repositories = ns ,
13451387 )
1346- raise error
13471388
1348- # Specification: Otherwise [if metadata allows] we merge the namespaces,
1349- # and continue on.
1389+ all_declared_sources = set ().union (* namespaces )
1390+ invalid_sources = all_declared_sources - known_remote_repo_urls
1391+
1392+ if invalid_sources :
1393+ raise UnsafeMultipleRemoteRepositories (
1394+ package = project_name ,
1395+ remote_repositories = invalid_sources ,
1396+ )
1397+
13501398 return None
0 commit comments