Skip to content

Commit c176899

Browse files
feat(preprod): Add insight comparison (#103774)
Adds backend code for insight comparison. Only produces diffs of insights or content within insights (files/groups), per offline chat with Trevor we're going against the designs to show "unresolved" files/insights. --------- Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
1 parent 905a502 commit c176899

File tree

4 files changed

+1441
-0
lines changed

4 files changed

+1441
-0
lines changed

src/sentry/preprod/size_analysis/compare.py

Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,20 @@
77
from packaging.version import parse as parse_version
88

99
from sentry.preprod.models import PreprodArtifactSizeMetrics
10+
from sentry.preprod.size_analysis.insight_models import (
11+
BaseInsightResult,
12+
FileSavingsResult,
13+
FileSavingsResultGroup,
14+
FilesInsightResult,
15+
GroupsInsightResult,
16+
)
1017
from sentry.preprod.size_analysis.models import (
1118
ComparisonResults,
1219
DiffItem,
1320
DiffType,
1421
FileAnalysis,
1522
FileInfo,
23+
InsightDiffItem,
1624
SizeAnalysisResults,
1725
SizeMetricDiffItem,
1826
TreemapElement,
@@ -79,6 +87,7 @@ def compare_size_analysis(
7987
path=path,
8088
item_type=item_type,
8189
type=diff_type,
90+
diff_items=None,
8291
)
8392
)
8493

@@ -101,6 +110,7 @@ def compare_size_analysis(
101110
path=path,
102111
item_type=head_element.type,
103112
type=DiffType.ADDED,
113+
diff_items=None,
104114
)
105115
)
106116

@@ -123,6 +133,7 @@ def compare_size_analysis(
123133
path=path,
124134
item_type=base_element.type,
125135
type=DiffType.REMOVED,
136+
diff_items=None,
126137
)
127138
)
128139
else:
@@ -144,9 +155,26 @@ def compare_size_analysis(
144155
base_download_size=base_size_analysis.max_download_size,
145156
)
146157

158+
# Compare insights only if we're not skipping the comparison
159+
insight_diff_items = []
160+
if not skip_diff_item_comparison:
161+
insight_diff_items = _compare_insights(
162+
head_size_analysis_results, base_size_analysis_results
163+
)
164+
else:
165+
logger.info(
166+
"preprod.size_analysis.compare.skipped_insight_comparison",
167+
extra={
168+
"head_analysis_version": head_size_analysis_results.analysis_version,
169+
"base_analysis_version": base_size_analysis_results.analysis_version,
170+
"preprod_artifact_id": head_size_analysis.preprod_artifact_id,
171+
},
172+
)
173+
147174
return ComparisonResults(
148175
diff_items=diff_items,
149176
size_metric_diff_item=size_metric_diff_item,
177+
insight_diff_items=insight_diff_items,
150178
skipped_diff_item_comparison=skip_diff_item_comparison,
151179
head_analysis_version=head_size_analysis_results.analysis_version,
152180
base_analysis_version=base_size_analysis_results.analysis_version,
@@ -336,3 +364,299 @@ def _collect_file_hashes(
336364
# Asset catalogs can have children
337365
for child in file_info.children:
338366
_collect_file_hashes(child, hash_to_paths, full_path)
367+
368+
369+
def _compare_files(
370+
head_files: list[FileSavingsResult], base_files: list[FileSavingsResult]
371+
) -> list[DiffItem]:
372+
diff_items = []
373+
374+
head_files_dict = {f.file_path: f for f in head_files}
375+
base_files_dict = {f.file_path: f for f in base_files}
376+
377+
all_paths = set(head_files_dict.keys()) | set(base_files_dict.keys())
378+
379+
for path in sorted(all_paths):
380+
head_file = head_files_dict.get(path)
381+
base_file = base_files_dict.get(path)
382+
383+
if head_file and base_file:
384+
logger.info(
385+
"preprod.size_analysis.compare.file_exists_in_both",
386+
extra={
387+
"head_file": head_file,
388+
"base_file": base_file,
389+
},
390+
)
391+
# File exists in both - check for size change
392+
size_diff = head_file.total_savings - base_file.total_savings
393+
394+
# Skip files with no size change
395+
if size_diff == 0:
396+
continue
397+
398+
diff_type = DiffType.INCREASED if size_diff > 0 else DiffType.DECREASED
399+
400+
diff_items.append(
401+
DiffItem(
402+
size_diff=size_diff,
403+
head_size=head_file.total_savings,
404+
base_size=base_file.total_savings,
405+
path=path,
406+
item_type=None,
407+
type=diff_type,
408+
diff_items=None,
409+
)
410+
)
411+
elif head_file:
412+
logger.info(
413+
"preprod.size_analysis.compare.file_added_in_head",
414+
extra={
415+
"head_file": head_file,
416+
},
417+
)
418+
# File added in head
419+
diff_items.append(
420+
DiffItem(
421+
size_diff=head_file.total_savings,
422+
head_size=head_file.total_savings,
423+
base_size=None,
424+
path=path,
425+
item_type=None,
426+
type=DiffType.ADDED,
427+
diff_items=None,
428+
)
429+
)
430+
elif base_file:
431+
logger.info(
432+
"preprod.size_analysis.compare.file_removed_from_head",
433+
extra={
434+
"base_file": base_file,
435+
},
436+
)
437+
# File removed from head
438+
diff_items.append(
439+
DiffItem(
440+
size_diff=-base_file.total_savings,
441+
head_size=None,
442+
base_size=base_file.total_savings,
443+
path=path,
444+
item_type=None,
445+
type=DiffType.REMOVED,
446+
diff_items=None,
447+
)
448+
)
449+
450+
return diff_items
451+
452+
453+
def _compare_groups(
454+
head_groups: list[FileSavingsResultGroup], base_groups: list[FileSavingsResultGroup]
455+
) -> list[DiffItem]:
456+
diff_items = []
457+
458+
# Create dictionaries for easier lookup
459+
head_groups_dict = {g.name: g for g in head_groups}
460+
base_groups_dict = {g.name: g for g in base_groups}
461+
462+
all_names = set(head_groups_dict.keys()) | set(base_groups_dict.keys())
463+
464+
for name in sorted(all_names):
465+
head_group = head_groups_dict.get(name)
466+
base_group = base_groups_dict.get(name)
467+
468+
if head_group and base_group:
469+
# Group exists in both - compare individual files within the group
470+
size_diff = head_group.total_savings - base_group.total_savings
471+
file_diffs = _compare_files(head_group.files, base_group.files)
472+
473+
# For unchanged groups, we need to show all files as unchanged
474+
if size_diff == 0 and len(file_diffs) == 0:
475+
# No size change and no file diffs, so the group is unchanged
476+
continue
477+
478+
# Always add group-level diff for groups that exist in both
479+
diff_type = DiffType.INCREASED if size_diff > 0 else DiffType.DECREASED
480+
diff_items.append(
481+
DiffItem(
482+
size_diff=size_diff,
483+
head_size=head_group.total_savings,
484+
base_size=base_group.total_savings,
485+
path=name,
486+
item_type=None,
487+
type=diff_type,
488+
diff_items=file_diffs if file_diffs else None,
489+
)
490+
)
491+
492+
elif head_group:
493+
# Group added in head - include individual files as nested diff items
494+
file_diffs = [
495+
DiffItem(
496+
size_diff=file.total_savings,
497+
head_size=file.total_savings,
498+
base_size=None,
499+
path=file.file_path,
500+
item_type=None,
501+
type=DiffType.ADDED,
502+
diff_items=None,
503+
)
504+
for file in head_group.files
505+
]
506+
507+
diff_items.append(
508+
DiffItem(
509+
size_diff=head_group.total_savings,
510+
head_size=head_group.total_savings,
511+
base_size=None,
512+
path=name,
513+
item_type=None,
514+
type=DiffType.ADDED,
515+
diff_items=file_diffs if file_diffs else None,
516+
)
517+
)
518+
519+
elif base_group:
520+
# Group removed from head - include individual files as nested diff items
521+
file_diffs = [
522+
DiffItem(
523+
size_diff=-file.total_savings,
524+
head_size=None,
525+
base_size=file.total_savings,
526+
path=file.file_path,
527+
item_type=None,
528+
type=DiffType.REMOVED,
529+
diff_items=None,
530+
)
531+
for file in base_group.files
532+
]
533+
534+
diff_items.append(
535+
DiffItem(
536+
size_diff=-base_group.total_savings,
537+
head_size=None,
538+
base_size=base_group.total_savings,
539+
path=name,
540+
item_type=None,
541+
type=DiffType.REMOVED,
542+
diff_items=file_diffs if file_diffs else None,
543+
)
544+
)
545+
546+
return diff_items
547+
548+
549+
def _diff_insight(
550+
insight_type: str,
551+
head_insight: BaseInsightResult | None,
552+
base_insight: BaseInsightResult | None,
553+
) -> InsightDiffItem | None:
554+
if head_insight is None and base_insight is None:
555+
raise ValueError("Both head and base insights are None")
556+
557+
if head_insight is None:
558+
# Should never happen, but here for mypy passing
559+
assert base_insight is not None
560+
status = "resolved"
561+
# Resolved insight - all items removed
562+
total_savings_change = -base_insight.total_savings
563+
head_files = []
564+
base_files = base_insight.files if isinstance(base_insight, FilesInsightResult) else []
565+
head_groups = []
566+
base_groups = base_insight.groups if isinstance(base_insight, GroupsInsightResult) else []
567+
elif base_insight is None:
568+
# Should never happen, but here for mypy passing
569+
assert head_insight is not None
570+
status = "new"
571+
# New insight - all items added
572+
total_savings_change = head_insight.total_savings
573+
head_files = head_insight.files if isinstance(head_insight, FilesInsightResult) else []
574+
base_files = []
575+
head_groups = head_insight.groups if isinstance(head_insight, GroupsInsightResult) else []
576+
base_groups = []
577+
else:
578+
status = "unresolved"
579+
# Unresolved insight - compare both
580+
total_savings_change = head_insight.total_savings - base_insight.total_savings
581+
head_files = head_insight.files if isinstance(head_insight, FilesInsightResult) else []
582+
base_files = base_insight.files if isinstance(base_insight, FilesInsightResult) else []
583+
head_groups = head_insight.groups if isinstance(head_insight, GroupsInsightResult) else []
584+
base_groups = base_insight.groups if isinstance(base_insight, GroupsInsightResult) else []
585+
586+
file_diffs = []
587+
group_diffs = []
588+
589+
if head_files or base_files:
590+
file_diffs = _compare_files(head_files, base_files)
591+
# If no file diffs, we don't want to show an irrelevant insight diff item
592+
if len(file_diffs) == 0:
593+
return None
594+
elif head_groups or base_groups:
595+
group_diffs = _compare_groups(head_groups, base_groups)
596+
# If no group diffs, we don't want to show an irrelevant insight diff item
597+
if len(group_diffs) == 0:
598+
return None
599+
600+
# TODO(EME-678) Implement non-File/GroupinsightsResult insight diffs in future
601+
602+
return InsightDiffItem(
603+
insight_type=insight_type,
604+
status=status,
605+
total_savings_change=total_savings_change,
606+
file_diffs=file_diffs,
607+
group_diffs=group_diffs,
608+
)
609+
610+
611+
def _compare_insights(
612+
head_size_analysis_results: SizeAnalysisResults,
613+
base_size_analysis_results: SizeAnalysisResults,
614+
) -> list[InsightDiffItem]:
615+
insight_diff_items: list[InsightDiffItem] = []
616+
617+
head_insights = head_size_analysis_results.insights
618+
base_insights = base_size_analysis_results.insights
619+
620+
if head_insights is None and base_insights is None:
621+
return insight_diff_items
622+
623+
# Convert insights to dictionaries for easier comparison
624+
head_insight_dict = (
625+
{
626+
field_name: value
627+
for field_name, value in vars(head_insights).items()
628+
if value is not None
629+
}
630+
if head_insights
631+
else {}
632+
)
633+
base_insight_dict = (
634+
{
635+
field_name: value
636+
for field_name, value in vars(base_insights).items()
637+
if value is not None
638+
}
639+
if base_insights
640+
else {}
641+
)
642+
643+
# Get all insight types from both head and base
644+
all_insight_types = set(head_insight_dict.keys()) | set(base_insight_dict.keys())
645+
646+
for insight_type in sorted(all_insight_types):
647+
head_insight = head_insight_dict.get(insight_type)
648+
base_insight = base_insight_dict.get(insight_type)
649+
650+
# Determine if insight has actionable savings (exclude zero-savings insights)
651+
head_has_savings = head_insight is not None and head_insight.total_savings > 0
652+
base_has_savings = base_insight is not None and base_insight.total_savings > 0
653+
654+
if not head_has_savings and not base_has_savings:
655+
# Skip insights with zero savings in both head and base
656+
continue
657+
658+
insight_diff_item = _diff_insight(insight_type, head_insight, base_insight)
659+
if insight_diff_item:
660+
insight_diff_items.append(insight_diff_item)
661+
662+
return insight_diff_items

0 commit comments

Comments
 (0)