11import logging
2- import uuid
2+ from uuid import UUID
33from gettext import gettext as _
44
55from django .db .models .query import QuerySet
1212log = logging .getLogger (__name__ )
1313
1414
15- def repair (repository_pk : uuid .UUID ) -> None :
15+ BULK_SIZE = 1000
16+
17+
18+ def repair (repository_pk : UUID ) -> None :
1619 """
1720 Repairs metadata of all packages for the specified repository.
1821
1922 Args:
20- repository_pk (uuid. UUID): The primary key of the repository to repair.
23+ repository_pk (UUID): The primary key of the repository to repair.
2124
2225 Returns:
2326 None
@@ -48,7 +51,11 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
4851 int: The number of packages that were repaired.
4952 """
5053 # TODO: Add on_demand content repair
51- immediate_content = content .filter (contentartifact__artifact__isnull = False )
54+ immediate_content = (
55+ content .filter (contentartifact__artifact__isnull = False )
56+ .distinct ()
57+ .prefetch_related ("_artifacts" )
58+ )
5259 domain = get_domain ()
5360
5461 batch = []
@@ -63,27 +70,56 @@ def repair_metadata(content: QuerySet[PythonPackageContent]) -> int:
6370 progress_report .save ()
6471 with progress_report :
6572 for package in progress_report .iter (
66- immediate_content .prefetch_related ( "_artifacts" ). iterator (chunk_size = 1000 )
73+ immediate_content .iterator (chunk_size = BULK_SIZE )
6774 ):
6875 new_data = artifact_to_python_content_data (
6976 package .filename , package ._artifacts .get (), domain
7077 )
71- changed = False
72- for field , value in new_data .items ():
73- if getattr (package , field ) != value :
74- setattr (package , field , value )
75- set_of_update_fields .add (field )
76- changed = True
77- if changed :
78- batch .append (package )
79- if len (batch ) == 1000 :
80- total_repaired += len (batch )
81- PythonPackageContent .objects .bulk_update (batch , set_of_update_fields )
82- batch = []
83- set_of_update_fields .clear ()
78+ total_repaired = update_package_if_needed (
79+ package , new_data , batch , set_of_update_fields , total_repaired
80+ )
8481
8582 if batch :
8683 total_repaired += len (batch )
8784 PythonPackageContent .objects .bulk_update (batch , set_of_update_fields )
8885
8986 return total_repaired
87+
88+
89+ def update_package_if_needed (
90+ package : PythonPackageContent ,
91+ new_data : dict ,
92+ batch : list [PythonPackageContent ],
93+ set_of_update_fields : set [str ],
94+ total_repaired : int ,
95+ ) -> int :
96+ """
97+ Compares the current package data with new data and updates the package
98+ if needed ("batch" and "set_of_update_fields" are updated in-place).
99+
100+ Args:
101+ package: Package to check and update.
102+ new_data: A dict of new field values to compare against the package.
103+ batch: A list of packages that were updated.
104+ set_of_update_fields: A set of package field names that were updated.
105+ total_repaired: The current count of repaired packages.
106+
107+ Returns:
108+ The updated count of repaired packages (increments in multiples of BULK_SIZE only).
109+ """
110+ changed = False
111+ for field , value in new_data .items ():
112+ if getattr (package , field ) != value :
113+ setattr (package , field , value )
114+ set_of_update_fields .add (field )
115+ changed = True
116+ if changed :
117+ batch .append (package )
118+
119+ if len (batch ) == BULK_SIZE :
120+ PythonPackageContent .objects .bulk_update (batch , set_of_update_fields )
121+ total_repaired += BULK_SIZE
122+ batch .clear ()
123+ set_of_update_fields .clear ()
124+
125+ return total_repaired
0 commit comments