11import logging
2- import uuid
32from gettext import gettext as _
3+ from uuid import UUID
44
55from django .db .models .query import QuerySet
6- from pulpcore .plugin .models import ProgressReport
7- from pulpcore .plugin .util import get_domain
8-
96from pulp_python .app .models import PythonPackageContent , PythonRepository
107from pulp_python .app .utils import artifact_to_python_content_data
8+ from pulpcore .plugin .models import ProgressReport
9+ from pulpcore .plugin .util import get_domain
1110
1211log = logging .getLogger (__name__ )
1312
1413
15- def repair (repository_pk : uuid .UUID ) -> None :
14+ BULK_SIZE = 1000
15+
16+
17+ def repair (repository_pk : UUID ) -> None :
1618 """
1719 Repairs metadata of all packages for the specified repository.
1820
1921 Args:
20- repository_pk (uuid. UUID): The primary key of the repository to repair.
22+ repository_pk (UUID): The primary key of the repository to repair.
2123
2224 Returns:
2325 None
@@ -48,7 +50,11 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
4850 int: The number of packages that were repaired.
4951 """
5052 # TODO: Add on_demand content repair
51- immediate_content = content .filter (contentartifact__artifact__isnull = False )
53+ immediate_content = (
54+ content .filter (contentartifact__artifact__isnull = False )
55+ .distinct ()
56+ .prefetch_related ("_artifacts" )
57+ )
5258 domain = get_domain ()
5359
5460 batch = []
@@ -63,27 +69,55 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
6369 progress_report .save ()
6470 with progress_report :
6571 for package in progress_report .iter (
66- immediate_content .prefetch_related ( "_artifacts" ). iterator (chunk_size = 1000 )
72+ immediate_content .iterator (chunk_size = BULK_SIZE )
6773 ):
6874 new_data = artifact_to_python_content_data (
6975 package .filename , package ._artifacts .get (), domain
7076 )
71- changed = False
72- for field , value in new_data .items ():
73- if getattr (package , field ) != value :
74- setattr (package , field , value )
75- set_of_update_fields .add (field )
76- changed = True
77- if changed :
78- batch .append (package )
79- if len (batch ) == 1000 :
80- total_repaired += len (batch )
81- PythonPackageContent .objects .bulk_update (batch , set_of_update_fields )
82- batch = []
83- set_of_update_fields .clear ()
77+ total_repaired += update_package_if_needed (
78+ package , new_data , batch , set_of_update_fields
79+ )
8480
8581 if batch :
8682 total_repaired += len (batch )
8783 PythonPackageContent .objects .bulk_update (batch , set_of_update_fields )
8884
8985 return total_repaired
86+
87+
88+ def update_package_if_needed (
89+ package : PythonPackageContent ,
90+ new_data : dict ,
91+ batch : list [PythonPackageContent ],
92+ set_of_update_fields : set [str ],
93+ ) -> int :
94+ """
95+ Compares the current package data with new data and updates the package
96+ if needed ("batch" and "set_of_update_fields" are updated in-place).
97+
98+ Args:
99+ package: Package to check and update.
100+ new_data: A dict of new field values to compare against the package.
101+ batch: A list of packages that were updated.
102+ set_of_update_fields: A set of package field names that were updated.
103+
104+ Returns:
105+ The count of repaired packages (increments in multiples of BULK_SIZE only).
106+ """
107+ total_repaired = 0
108+ changed = False
109+ for field , value in new_data .items ():
110+ if getattr (package , field ) != value :
111+ setattr (package , field , value )
112+ set_of_update_fields .add (field )
113+ changed = True
114+ if changed :
115+ batch .append (package )
116+
117+ if len (batch ) == BULK_SIZE :
118+ PythonPackageContent .objects .bulk_update (batch , set_of_update_fields )
119+ total_repaired += BULK_SIZE
120+ batch .clear ()
121+ set_of_update_fields .clear ()
122+
123+ return total_repaired
0 commit comments