77from packaging .version import parse as parse_version
88
99from sentry .preprod .models import PreprodArtifactSizeMetrics
10+ from sentry .preprod .size_analysis .insight_models import (
11+ BaseInsightResult ,
12+ FileSavingsResult ,
13+ FileSavingsResultGroup ,
14+ FilesInsightResult ,
15+ GroupsInsightResult ,
16+ )
1017from sentry .preprod .size_analysis .models import (
1118 ComparisonResults ,
1219 DiffItem ,
1320 DiffType ,
1421 FileAnalysis ,
1522 FileInfo ,
23+ InsightDiffItem ,
1624 SizeAnalysisResults ,
1725 SizeMetricDiffItem ,
1826 TreemapElement ,
@@ -79,6 +87,7 @@ def compare_size_analysis(
7987 path = path ,
8088 item_type = item_type ,
8189 type = diff_type ,
90+ diff_items = None ,
8291 )
8392 )
8493
@@ -101,6 +110,7 @@ def compare_size_analysis(
101110 path = path ,
102111 item_type = head_element .type ,
103112 type = DiffType .ADDED ,
113+ diff_items = None ,
104114 )
105115 )
106116
@@ -123,6 +133,7 @@ def compare_size_analysis(
123133 path = path ,
124134 item_type = base_element .type ,
125135 type = DiffType .REMOVED ,
136+ diff_items = None ,
126137 )
127138 )
128139 else :
@@ -144,9 +155,26 @@ def compare_size_analysis(
144155 base_download_size = base_size_analysis .max_download_size ,
145156 )
146157
158+ # Compare insights only if we're not skipping the comparison
159+ insight_diff_items = []
160+ if not skip_diff_item_comparison :
161+ insight_diff_items = _compare_insights (
162+ head_size_analysis_results , base_size_analysis_results
163+ )
164+ else :
165+ logger .info (
166+ "preprod.size_analysis.compare.skipped_insight_comparison" ,
167+ extra = {
168+ "head_analysis_version" : head_size_analysis_results .analysis_version ,
169+ "base_analysis_version" : base_size_analysis_results .analysis_version ,
170+ "preprod_artifact_id" : head_size_analysis .preprod_artifact_id ,
171+ },
172+ )
173+
147174 return ComparisonResults (
148175 diff_items = diff_items ,
149176 size_metric_diff_item = size_metric_diff_item ,
177+ insight_diff_items = insight_diff_items ,
150178 skipped_diff_item_comparison = skip_diff_item_comparison ,
151179 head_analysis_version = head_size_analysis_results .analysis_version ,
152180 base_analysis_version = base_size_analysis_results .analysis_version ,
@@ -336,3 +364,299 @@ def _collect_file_hashes(
336364 # Asset catalogs can have children
337365 for child in file_info .children :
338366 _collect_file_hashes (child , hash_to_paths , full_path )
367+
368+
369+ def _compare_files (
370+ head_files : list [FileSavingsResult ], base_files : list [FileSavingsResult ]
371+ ) -> list [DiffItem ]:
372+ diff_items = []
373+
374+ head_files_dict = {f .file_path : f for f in head_files }
375+ base_files_dict = {f .file_path : f for f in base_files }
376+
377+ all_paths = set (head_files_dict .keys ()) | set (base_files_dict .keys ())
378+
379+ for path in sorted (all_paths ):
380+ head_file = head_files_dict .get (path )
381+ base_file = base_files_dict .get (path )
382+
383+ if head_file and base_file :
384+ logger .info (
385+ "preprod.size_analysis.compare.file_exists_in_both" ,
386+ extra = {
387+ "head_file" : head_file ,
388+ "base_file" : base_file ,
389+ },
390+ )
391+ # File exists in both - check for size change
392+ size_diff = head_file .total_savings - base_file .total_savings
393+
394+ # Skip files with no size change
395+ if size_diff == 0 :
396+ continue
397+
398+ diff_type = DiffType .INCREASED if size_diff > 0 else DiffType .DECREASED
399+
400+ diff_items .append (
401+ DiffItem (
402+ size_diff = size_diff ,
403+ head_size = head_file .total_savings ,
404+ base_size = base_file .total_savings ,
405+ path = path ,
406+ item_type = None ,
407+ type = diff_type ,
408+ diff_items = None ,
409+ )
410+ )
411+ elif head_file :
412+ logger .info (
413+ "preprod.size_analysis.compare.file_added_in_head" ,
414+ extra = {
415+ "head_file" : head_file ,
416+ },
417+ )
418+ # File added in head
419+ diff_items .append (
420+ DiffItem (
421+ size_diff = head_file .total_savings ,
422+ head_size = head_file .total_savings ,
423+ base_size = None ,
424+ path = path ,
425+ item_type = None ,
426+ type = DiffType .ADDED ,
427+ diff_items = None ,
428+ )
429+ )
430+ elif base_file :
431+ logger .info (
432+ "preprod.size_analysis.compare.file_removed_from_head" ,
433+ extra = {
434+ "base_file" : base_file ,
435+ },
436+ )
437+ # File removed from head
438+ diff_items .append (
439+ DiffItem (
440+ size_diff = - base_file .total_savings ,
441+ head_size = None ,
442+ base_size = base_file .total_savings ,
443+ path = path ,
444+ item_type = None ,
445+ type = DiffType .REMOVED ,
446+ diff_items = None ,
447+ )
448+ )
449+
450+ return diff_items
451+
452+
453+ def _compare_groups (
454+ head_groups : list [FileSavingsResultGroup ], base_groups : list [FileSavingsResultGroup ]
455+ ) -> list [DiffItem ]:
456+ diff_items = []
457+
458+ # Create dictionaries for easier lookup
459+ head_groups_dict = {g .name : g for g in head_groups }
460+ base_groups_dict = {g .name : g for g in base_groups }
461+
462+ all_names = set (head_groups_dict .keys ()) | set (base_groups_dict .keys ())
463+
464+ for name in sorted (all_names ):
465+ head_group = head_groups_dict .get (name )
466+ base_group = base_groups_dict .get (name )
467+
468+ if head_group and base_group :
469+ # Group exists in both - compare individual files within the group
470+ size_diff = head_group .total_savings - base_group .total_savings
471+ file_diffs = _compare_files (head_group .files , base_group .files )
472+
473+ # For unchanged groups, we need to show all files as unchanged
474+ if size_diff == 0 and len (file_diffs ) == 0 :
475+ # No size change and no file diffs, so the group is unchanged
476+ continue
477+
478+ # Always add group-level diff for groups that exist in both
479+ diff_type = DiffType .INCREASED if size_diff > 0 else DiffType .DECREASED
480+ diff_items .append (
481+ DiffItem (
482+ size_diff = size_diff ,
483+ head_size = head_group .total_savings ,
484+ base_size = base_group .total_savings ,
485+ path = name ,
486+ item_type = None ,
487+ type = diff_type ,
488+ diff_items = file_diffs if file_diffs else None ,
489+ )
490+ )
491+
492+ elif head_group :
493+ # Group added in head - include individual files as nested diff items
494+ file_diffs = [
495+ DiffItem (
496+ size_diff = file .total_savings ,
497+ head_size = file .total_savings ,
498+ base_size = None ,
499+ path = file .file_path ,
500+ item_type = None ,
501+ type = DiffType .ADDED ,
502+ diff_items = None ,
503+ )
504+ for file in head_group .files
505+ ]
506+
507+ diff_items .append (
508+ DiffItem (
509+ size_diff = head_group .total_savings ,
510+ head_size = head_group .total_savings ,
511+ base_size = None ,
512+ path = name ,
513+ item_type = None ,
514+ type = DiffType .ADDED ,
515+ diff_items = file_diffs if file_diffs else None ,
516+ )
517+ )
518+
519+ elif base_group :
520+ # Group removed from head - include individual files as nested diff items
521+ file_diffs = [
522+ DiffItem (
523+ size_diff = - file .total_savings ,
524+ head_size = None ,
525+ base_size = file .total_savings ,
526+ path = file .file_path ,
527+ item_type = None ,
528+ type = DiffType .REMOVED ,
529+ diff_items = None ,
530+ )
531+ for file in base_group .files
532+ ]
533+
534+ diff_items .append (
535+ DiffItem (
536+ size_diff = - base_group .total_savings ,
537+ head_size = None ,
538+ base_size = base_group .total_savings ,
539+ path = name ,
540+ item_type = None ,
541+ type = DiffType .REMOVED ,
542+ diff_items = file_diffs if file_diffs else None ,
543+ )
544+ )
545+
546+ return diff_items
547+
548+
549+ def _diff_insight (
550+ insight_type : str ,
551+ head_insight : BaseInsightResult | None ,
552+ base_insight : BaseInsightResult | None ,
553+ ) -> InsightDiffItem | None :
554+ if head_insight is None and base_insight is None :
555+ raise ValueError ("Both head and base insights are None" )
556+
557+ if head_insight is None :
558+ # Should never happen, but here for mypy passing
559+ assert base_insight is not None
560+ status = "resolved"
561+ # Resolved insight - all items removed
562+ total_savings_change = - base_insight .total_savings
563+ head_files = []
564+ base_files = base_insight .files if isinstance (base_insight , FilesInsightResult ) else []
565+ head_groups = []
566+ base_groups = base_insight .groups if isinstance (base_insight , GroupsInsightResult ) else []
567+ elif base_insight is None :
568+ # Should never happen, but here for mypy passing
569+ assert head_insight is not None
570+ status = "new"
571+ # New insight - all items added
572+ total_savings_change = head_insight .total_savings
573+ head_files = head_insight .files if isinstance (head_insight , FilesInsightResult ) else []
574+ base_files = []
575+ head_groups = head_insight .groups if isinstance (head_insight , GroupsInsightResult ) else []
576+ base_groups = []
577+ else :
578+ status = "unresolved"
579+ # Unresolved insight - compare both
580+ total_savings_change = head_insight .total_savings - base_insight .total_savings
581+ head_files = head_insight .files if isinstance (head_insight , FilesInsightResult ) else []
582+ base_files = base_insight .files if isinstance (base_insight , FilesInsightResult ) else []
583+ head_groups = head_insight .groups if isinstance (head_insight , GroupsInsightResult ) else []
584+ base_groups = base_insight .groups if isinstance (base_insight , GroupsInsightResult ) else []
585+
586+ file_diffs = []
587+ group_diffs = []
588+
589+ if head_files or base_files :
590+ file_diffs = _compare_files (head_files , base_files )
591+ # If no file diffs, we don't want to show an irrelevant insight diff item
592+ if len (file_diffs ) == 0 :
593+ return None
594+ elif head_groups or base_groups :
595+ group_diffs = _compare_groups (head_groups , base_groups )
596+ # If no group diffs, we don't want to show an irrelevant insight diff item
597+ if len (group_diffs ) == 0 :
598+ return None
599+
600+ # TODO(EME-678) Implement non-File/GroupinsightsResult insight diffs in future
601+
602+ return InsightDiffItem (
603+ insight_type = insight_type ,
604+ status = status ,
605+ total_savings_change = total_savings_change ,
606+ file_diffs = file_diffs ,
607+ group_diffs = group_diffs ,
608+ )
609+
610+
611+ def _compare_insights (
612+ head_size_analysis_results : SizeAnalysisResults ,
613+ base_size_analysis_results : SizeAnalysisResults ,
614+ ) -> list [InsightDiffItem ]:
615+ insight_diff_items : list [InsightDiffItem ] = []
616+
617+ head_insights = head_size_analysis_results .insights
618+ base_insights = base_size_analysis_results .insights
619+
620+ if head_insights is None and base_insights is None :
621+ return insight_diff_items
622+
623+ # Convert insights to dictionaries for easier comparison
624+ head_insight_dict = (
625+ {
626+ field_name : value
627+ for field_name , value in vars (head_insights ).items ()
628+ if value is not None
629+ }
630+ if head_insights
631+ else {}
632+ )
633+ base_insight_dict = (
634+ {
635+ field_name : value
636+ for field_name , value in vars (base_insights ).items ()
637+ if value is not None
638+ }
639+ if base_insights
640+ else {}
641+ )
642+
643+ # Get all insight types from both head and base
644+ all_insight_types = set (head_insight_dict .keys ()) | set (base_insight_dict .keys ())
645+
646+ for insight_type in sorted (all_insight_types ):
647+ head_insight = head_insight_dict .get (insight_type )
648+ base_insight = base_insight_dict .get (insight_type )
649+
650+ # Determine if insight has actionable savings (exclude zero-savings insights)
651+ head_has_savings = head_insight is not None and head_insight .total_savings > 0
652+ base_has_savings = base_insight is not None and base_insight .total_savings > 0
653+
654+ if not head_has_savings and not base_has_savings :
655+ # Skip insights with zero savings in both head and base
656+ continue
657+
658+ insight_diff_item = _diff_insight (insight_type , head_insight , base_insight )
659+ if insight_diff_item :
660+ insight_diff_items .append (insight_diff_item )
661+
662+ return insight_diff_items
0 commit comments