Skip to content

Commit 84c61b8

Browse files
committed
Use iterator() instead of paginated() for fetching advisories
- paginated() performs poorly when iterating over large records compared to the built-in iterator() Signed-off-by: Keshav Priyadarshi <git@keshav.space>
1 parent 0ed58a8 commit 84c61b8

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

vulnerabilities/pipelines/remove_duplicate_advisories.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616

1717
class RemoveDuplicateAdvisoriesPipeline(VulnerableCodePipeline):
18-
"""Pipeline to remove duplicate advisories based on their content."""
18+
"""Pipeline to compute new advisory content id and remove duplicate advisories based on their content."""
1919

2020
pipeline_id = "remove_duplicate_advisories"
2121

@@ -25,22 +25,22 @@ def steps(cls):
2525

2626
def remove_duplicates(self):
2727
"""
28-
Recompute content id and remove advisories with the same content and keep only the latest one.
28+
Recompute the content ID and remove duplicate advisories, keeping the oldest one.
2929
"""
3030

3131
advisories_count = Advisory.objects.all().count()
3232
self.log(f"Computing new content id for {advisories_count} and removing duplicates.")
3333

3434
update_batch_size = 500
3535
delete_batch_size = 5000
36-
chunk_size = 50000
36+
chunk_size = 5000
3737
deleted_advisories_count = 0
3838
updated_advisories_count = 0
3939
duplicate_advisory_ids = []
4040
advisories_to_update = []
4141
content_ids = set()
4242

43-
advisories = Advisory.objects.all().order_by("id").paginated(per_page=chunk_size)
43+
advisories = Advisory.objects.all().order_by("id").iterator(chunk_size=chunk_size)
4444

4545
progress = LoopProgress(
4646
total_iterations=advisories_count,

0 commit comments

Comments
 (0)