Skip to content

Commit e320362

Browse files
committed
Fix requires_python metadata + add repair metadata command
fixes: #773
1 parent 85b3398 commit e320362

File tree

10 files changed

+250
-35
lines changed

10 files changed

+250
-35
lines changed

CHANGES/773.bugfix

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fixed `requires_python` field not being properly set on package upload.
2+
3+
Run the new `pulpcore-manager repair-python-metadata` command with repositories containing affected
4+
packages to repair their metadata.

pulp_python/app/management/__init__.py

Whitespace-only changes.

pulp_python/app/management/commands/__init__.py

Whitespace-only changes.
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import re
2+
import os
3+
from django.core.management import BaseCommand, CommandError
4+
from gettext import gettext as _
5+
6+
from django.conf import settings
7+
8+
from pulpcore.plugin.util import extract_pk
9+
from pulp_python.app.models import PythonPackageContent, PythonRepository
10+
from pulp_python.app.utils import artifact_to_python_content_data
11+
12+
13+
def repair_metadata(content):
14+
"""
15+
Repairs the metadata for the passed in content queryset.
16+
:param content: The PythonPackageContent queryset.
17+
Return: number of content units that were repaired
18+
"""
19+
# TODO: Add on_demand content repair?
20+
os.chdir(settings.WORKING_DIRECTORY)
21+
content = content.select_related("pulp_domain")
22+
immediate_content = content.filter(contentartifact__artifact__isnull=False)
23+
batch = []
24+
set_of_update_fields = set()
25+
total_repaired = 0
26+
for package in immediate_content.prefetch_related('_artifacts').iterator(chunk_size=1000):
27+
new_data = artifact_to_python_content_data(
28+
package.filename, package._artifacts.get(), package.pulp_domain
29+
)
30+
changed = False
31+
for field, value in new_data.items():
32+
if getattr(package, field) != value:
33+
setattr(package, field, value)
34+
set_of_update_fields.add(field)
35+
changed = True
36+
if changed:
37+
batch.append(package)
38+
if len(batch) == 1000:
39+
total_repaired += len(batch)
40+
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
41+
batch = []
42+
set_of_update_fields.clear()
43+
44+
if len(batch) > 0:
45+
total_repaired += len(batch)
46+
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
47+
48+
return total_repaired
49+
50+
51+
def href_prn_list_handler(value):
52+
"""Common list parsing for a string of hrefs/prns."""
53+
h = rf"(?:{settings.API_ROOT}(?:[-_a-zA-Z0-9]+/)?api/v3/repositories/python/python/[-a-f0-9]+/)"
54+
p = r"(?:prn:python\.pythonrepository:[-a-f0-9]+)"
55+
r = rf"{h}|{p}"
56+
return re.findall(r, value)
57+
58+
59+
class Command(BaseCommand):
60+
"""
61+
Management command to repair metadata of PythonPackageContent.
62+
"""
63+
64+
help = _("Repair the metadata of PythonPackageContent stored in PythonRepositories")
65+
66+
def add_arguments(self, parser):
67+
"""Set up arguments."""
68+
parser.add_argument(
69+
"--repositories",
70+
type=href_prn_list_handler,
71+
required=False,
72+
help=_(
73+
"List of PythonRepository hrefs/prns whose content's metadata will be repaired. "
74+
"Leave blank to include all repositories in all domains. Mutually exclusive "
75+
"with domain."
76+
),
77+
)
78+
parser.add_argument(
79+
"--domain",
80+
default=None,
81+
required=False,
82+
help=_(
83+
"The pulp domain to gather the repositories from if specified. Mutually"
84+
" exclusive with repositories."
85+
),
86+
)
87+
88+
def handle(self, *args, **options):
89+
"""Implement the command."""
90+
domain = options.get("domain")
91+
repository_hrefs = options.get("repositories")
92+
if domain and repository_hrefs:
93+
raise CommandError(_("--domain and --repositories are mutually exclusive"))
94+
95+
repositories = PythonRepository.objects.all()
96+
if repository_hrefs:
97+
repos_ids = [extract_pk(r) for r in repository_hrefs]
98+
repositories = repositories.filter(pk__in=repos_ids)
99+
elif domain:
100+
repositories = repositories.filter(pulp_domain__name=domain)
101+
102+
content_set = set()
103+
for repository in repositories:
104+
content_set.update(repository.latest_version().content.values_list("pk", flat=True))
105+
content = PythonPackageContent.objects.filter(pk__in=content_set)
106+
num_repaired = repair_metadata(content)
107+
print(f"{len(content_set)} packages processed, {num_repaired} package metadata repaired.")

pulp_python/app/models.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@
1717

1818
from pathlib import PurePath
1919
from .utils import (
20+
artifact_to_python_content_data,
2021
canonicalize_name,
21-
get_project_metadata_from_artifact,
22-
parse_project_metadata,
2322
python_content_to_json,
2423
PYPI_LAST_SERIAL,
2524
PYPI_SERIAL_CONSTANT,
@@ -189,14 +188,7 @@ class PythonPackageContent(Content):
189188
def init_from_artifact_and_relative_path(artifact, relative_path):
190189
"""Used when downloading package from pull-through cache."""
191190
path = PurePath(relative_path)
192-
metadata = get_project_metadata_from_artifact(path.name, artifact)
193-
data = parse_project_metadata(vars(metadata))
194-
data["packagetype"] = metadata.packagetype
195-
data["version"] = metadata.version
196-
data["filename"] = path.name
197-
data["sha256"] = artifact.sha256
198-
data["pulp_domain_id"] = artifact.pulp_domain_id
199-
data["_pulp_domain_id"] = artifact.pulp_domain_id
191+
data = artifact_to_python_content_data(path.name, artifact, domain=get_domain())
200192
return PythonPackageContent(**data)
201193

202194
def __str__(self):

pulp_python/app/serializers.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from pulp_python.app import models as python_models
1111
from pulp_python.app import fields
12-
from pulp_python.app.utils import get_project_metadata_from_artifact, parse_project_metadata
12+
from pulp_python.app.utils import artifact_to_python_content_data
1313

1414

1515
class PythonRepositorySerializer(core_serializers.RepositorySerializer):
@@ -217,7 +217,7 @@ def deferred_validate(self, data):
217217

218218
artifact = data["artifact"]
219219
try:
220-
metadata = get_project_metadata_from_artifact(filename, artifact)
220+
_data = artifact_to_python_content_data(filename, artifact, domain=get_domain())
221221
except ValueError:
222222
raise serializers.ValidationError(_(
223223
"Extension on {} is not a valid python extension "
@@ -231,14 +231,6 @@ def deferred_validate(self, data):
231231
)}
232232
)
233233

234-
_data = parse_project_metadata(vars(metadata))
235-
_data['packagetype'] = metadata.packagetype
236-
_data['version'] = metadata.version
237-
_data['filename'] = filename
238-
_data['sha256'] = artifact.sha256
239-
data["pulp_domain_id"] = artifact.pulp_domain_id
240-
data["_pulp_domain_id"] = artifact.pulp_domain_id
241-
242234
data.update(_data)
243235

244236
return data

pulp_python/app/tasks/upload.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pulpcore.plugin.util import get_domain
88

99
from pulp_python.app.models import PythonPackageContent, PythonRepository
10-
from pulp_python.app.utils import get_project_metadata_from_artifact, parse_project_metadata
10+
from pulp_python.app.utils import artifact_to_python_content_data
1111

1212

1313
def upload(artifact_sha256, filename, repository_pk=None):
@@ -76,15 +76,7 @@ def create_content(artifact_sha256, filename, domain):
7676
queryset of the new created content
7777
"""
7878
artifact = Artifact.objects.get(sha256=artifact_sha256, pulp_domain=domain)
79-
metadata = get_project_metadata_from_artifact(filename, artifact)
80-
81-
data = parse_project_metadata(vars(metadata))
82-
data['packagetype'] = metadata.packagetype
83-
data['version'] = metadata.version
84-
data['filename'] = filename
85-
data['sha256'] = artifact.sha256
86-
data['pulp_domain'] = domain
87-
data['_pulp_domain'] = domain
79+
data = artifact_to_python_content_data(filename, artifact, domain)
8880

8981
@transaction.atomic()
9082
def create():

pulp_python/app/utils.py

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pkginfo
2+
import re
23
import shutil
34
import tempfile
45
import json
@@ -51,6 +52,20 @@
5152
".zip": "sdist",
5253
}
5354

55+
DIST_REGEXES = {
56+
# regex from https://github.com/pypa/pip/blob/18.0/src/pip/_internal/wheel.py#L569
57+
".whl": re.compile(
58+
r"""^(?P<name>.+?)-(?P<version>.*?)
59+
((-(?P<build>\d[^-]*?))?-(?P<pyver>.+?)-(?P<abi>.+?)-(?P<plat>.+?)
60+
\.whl|\.dist-info)$""",
61+
re.VERBOSE
62+
),
63+
# regex based on https://setuptools.pypa.io/en/latest/deprecated/python_eggs.html#filename-embedded-metadata # noqa: E501
64+
".egg": re.compile(r"^(?P<name>.+?)-(?P<version>.*?)(-(?P<pyver>.+?(-(?P<plat>.+?))?))?\.egg|\.egg-info$"), # noqa: E501
65+
# regex based on https://github.com/python/cpython/blob/v3.7.0/Lib/distutils/command/bdist_wininst.py#L292 # noqa: E501
66+
".exe": re.compile(r"^(?P<name>.+?)-(?P<version>.*?)\.(?P<plat>.+?)(-(?P<pyver>.+?))?\.exe$"),
67+
}
68+
5469
DIST_TYPES = {
5570
"bdist_wheel": pkginfo.Wheel,
5671
"bdist_wininst": pkginfo.Distribution,
@@ -72,6 +87,8 @@ def parse_project_metadata(project):
7287
"""
7388
package = {}
7489
package['name'] = project.get('name') or ""
90+
package['version'] = project.get('version') or ""
91+
package['packagetype'] = project.get('packagetype') or ""
7592
package['metadata_version'] = project.get('metadata_version') or ""
7693
package['summary'] = project.get('summary') or ""
7794
package['description'] = project.get('description') or ""
@@ -86,13 +103,15 @@ def parse_project_metadata(project):
86103
package['project_url'] = project.get('project_url') or ""
87104
package['platform'] = project.get('platform') or ""
88105
package['supported_platform'] = project.get('supported_platform') or ""
106+
package['requires_python'] = project.get('requires_python') or ""
89107
package['requires_dist'] = json.dumps(project.get('requires_dist', []))
90108
package['provides_dist'] = json.dumps(project.get('provides_dist', []))
91109
package['obsoletes_dist'] = json.dumps(project.get('obsoletes_dist', []))
92110
package['requires_external'] = json.dumps(project.get('requires_external', []))
93111
package['classifiers'] = json.dumps(project.get('classifiers', []))
94112
package['project_urls'] = json.dumps(project.get('project_urls', {}))
95113
package['description_content_type'] = project.get('description_content_type') or ""
114+
package['python_version'] = project.get('python_version') or ""
96115

97116
return package
98117

@@ -113,17 +132,15 @@ def parse_metadata(project, version, distribution):
113132
dictionary: of useful python metadata
114133
115134
"""
116-
package = {}
135+
package = parse_project_metadata(project)
117136

118137
package['filename'] = distribution.get('filename') or ""
119138
package['packagetype'] = distribution.get('packagetype') or ""
120139
package['version'] = version
121140
package['url'] = distribution.get('url') or ""
122141
package['sha256'] = distribution.get('digests', {}).get('sha256') or ""
123-
package['python_version'] = distribution.get('python_version') or ""
124-
package['requires_python'] = distribution.get('requires_python') or ""
125-
126-
package.update(parse_project_metadata(project))
142+
package['python_version'] = distribution.get('python_version') or package.get('python_version')
143+
package['requires_python'] = distribution.get('requires_python') or package.get('requires_python') # noqa: E501
127144

128145
return package
129146

@@ -147,9 +164,30 @@ def get_project_metadata_from_artifact(filename, artifact):
147164
temp_file.flush()
148165
metadata = DIST_TYPES[packagetype](temp_file.name)
149166
metadata.packagetype = packagetype
167+
if packagetype == "sdist":
168+
metadata.python_version = "source"
169+
else:
170+
pyver = ""
171+
regex = DIST_REGEXES[extensions[pkg_type_index]]
172+
if bdist_name := regex.match(filename):
173+
pyver = bdist_name.group("pyver") or ""
174+
metadata.python_version = pyver
150175
return metadata
151176

152177

178+
def artifact_to_python_content_data(filename, artifact, domain=None):
179+
"""
180+
Takes the artifact/filename and returns the metadata needed to create a PythonPackageContent.
181+
"""
182+
metadata = get_project_metadata_from_artifact(filename, artifact)
183+
data = parse_project_metadata(vars(metadata))
184+
data['sha256'] = artifact.sha256
185+
data['filename'] = filename
186+
data['pulp_domain'] = domain or artifact.pulp_domain
187+
data['_pulp_domain'] = data['pulp_domain']
188+
return data
189+
190+
153191
def python_content_to_json(base_path, content_query, version=None, domain=None):
154192
"""
155193
Converts a QuerySet of PythonPackageContent into the PyPi JSON format

pulp_python/tests/functional/api/test_crud_content_unit.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,15 @@ def test_upload_metadata_23_spec(python_content_factory):
122122
content = python_content_factory(filename, url=package.url)
123123
assert content.metadata_version == "2.3"
124124
break
125+
126+
127+
@pytest.mark.parallel
128+
def test_upload_requires_python(python_content_factory):
129+
filename = "pip-24.3.1-py3-none-any.whl"
130+
with PyPISimple() as client:
131+
page = client.get_project_page("pip")
132+
for package in page.packages:
133+
if package.filename == filename:
134+
content = python_content_factory(filename, url=package.url)
135+
assert content.requires_python == ">=3.8"
136+
break
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import pytest
2+
import subprocess
3+
4+
from pulp_python.tests.functional.constants import PYTHON_EGG_FILENAME
5+
6+
@pytest.fixture
7+
def create_content_direct(python_bindings):
8+
def _create(artifact_filename, filename, content_data):
9+
commands = (
10+
"from pulpcore.plugin.models import Artifact, ContentArtifact; "
11+
"from pulpcore.plugin.util import get_url; "
12+
"from pulp_python.app.models import PythonPackageContent; "
13+
f"a = Artifact.init_and_validate('{artifact_filename}'); "
14+
"a.save(); "
15+
f"c = PythonPackageContent(sha256=a.sha256, filename={filename!r}, **{content_data!r}); " # noqa: E501
16+
"c.save(); "
17+
f"ca = ContentArtifact(artifact=a, content=c, relative_path={filename!r}); "
18+
"ca.save(); "
19+
"print(get_url(c))"
20+
)
21+
process = subprocess.run(["pulpcore-manager", "shell", "-c", commands], capture_output=True)
22+
23+
assert process.returncode == 0
24+
content_href = process.stdout.decode().strip()
25+
return python_bindings.ContentPackagesApi.read(content_href)
26+
27+
return _create
28+
29+
30+
@pytest.fixture
31+
def move_to_repository(python_bindings, monitor_task):
32+
def _move(repo_href, content_hrefs):
33+
body = {"add_content_units": content_hrefs}
34+
task = monitor_task(python_bindings.RepositoriesPythonApi.modify(repo_href, body).task)
35+
assert len(task.created_resources) == 1
36+
return python_bindings.RepositoriesPythonApi.read(repo_href)
37+
38+
return _move
39+
40+
41+
def test_metadata_repair_command(
42+
create_content_direct,
43+
python_file,
44+
python_repo,
45+
move_to_repository,
46+
python_bindings,
47+
delete_orphans_pre,
48+
):
49+
"""Test pulpcore-manager repair-python-metadata command."""
50+
data = {
51+
"name": "shelf-reader",
52+
# Wrong metadata
53+
"version": "0.2",
54+
"packagetype": "bdist",
55+
"requires_python": ">=3.8",
56+
"author": "ME",
57+
}
58+
content = create_content_direct(python_file, PYTHON_EGG_FILENAME, data)
59+
for field, wrong_value in data.items():
60+
if field == "python_version":
61+
continue
62+
assert getattr(content, field) == wrong_value
63+
64+
print(python_repo.name)
65+
move_to_repository(python_repo.pulp_href, [content.pulp_href])
66+
process = subprocess.run(
67+
["pulpcore-manager", "repair-python-metadata", "--repositories", python_repo.pulp_href],
68+
capture_output=True
69+
)
70+
assert process.returncode == 0
71+
output = process.stdout.decode().strip()
72+
assert output == f"1 packages processed, 1 package metadata repaired."
73+
74+
content = python_bindings.ContentPackagesApi.read(content.pulp_href)
75+
assert content.version == "0.1"
76+
assert content.packagetype == "sdist"
77+
assert content.requires_python == "" # technically null
78+
assert content.author == "Austin Macdonald"

0 commit comments

Comments
 (0)