elementary-data · arbiv · Nov 28, 2025 · Nov 13, 2025 · Nov 13, 2025 · Nov 20, 2025
diff --git a/integration_tests/tests/test_dimension_anomalies.py b/integration_tests/tests/test_dimension_anomalies.py
@@ -218,3 +218,104 @@ def test_dimension_anomalies_with_timestamp_exclude_final_results(
     test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
     assert test_result["status"] == "fail"
     assert test_result["failures"] == 1
+
+
+# Test for exclude_detection_period_from_training functionality
+# This test demonstrates the use case where:
+# 1. Detection period contains anomalous distribution data that would normally be included in training
+# 2. With exclude_detection=False: anomaly is missed (test passes) because training includes the anomaly
+# 3. With exclude_detection=True: anomaly is detected (test fails) because training excludes the anomaly
+@pytest.mark.skip_targets(["clickhouse"])
+@pytest.mark.parametrize(
+    "exclude_detection,expected_status",
+    [
+        (False, "pass"),  # include detection in training → anomaly absorbed
+        (True, "fail"),  # exclude detection from training → anomaly detected
+    ],
+    ids=[
+        "exclude_false",
+        "exclude_true",
+    ],  # Shortened to stay under Postgres 63-char limit
+)
+def test_anomaly_in_detection_period(
+    test_id: str,
+    dbt_project: DbtProject,
+    exclude_detection: bool,
+    expected_status: str,
+):
+    """
+    Test the exclude_detection_period_from_training flag functionality for dimension anomalies.
+
+    Scenario:
+    - 30 days of normal data with variance (45/50/55 Superman, 55/50/45 Spiderman pattern)
+    - 7 days of anomalous data (72 Superman, 28 Spiderman per day) in detection period
+    - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
+    - With exclusion: anomaly excluded from training, test fails (detects anomaly)
+
+    Note: Parametrize IDs are shortened to avoid Postgres 63-character identifier limit.
+    """
+    utc_now = datetime.utcnow().date()
+
+    # Generate 30 days of normal data with variance (45/50/55 pattern for Superman)
+    normal_pattern = [45, 50, 55]
+    normal_data = []
+    for i in range(30):
+        date = utc_now - timedelta(days=37 - i)
+        superman_count = normal_pattern[i % 3]
+        spiderman_count = 100 - superman_count
+        normal_data.extend(
+            [
+                {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": "Superman"}
+                for _ in range(superman_count)
+            ]
+        )
+        normal_data.extend(
+            [
+                {
+                    TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
+                    "superhero": "Spiderman",
+                }
+                for _ in range(spiderman_count)
+            ]
+        )
+
+    # Generate 7 days of anomalous data (72 Superman, 28 Spiderman per day) - this will be in detection period
+    anomalous_data = []
+    for i in range(7):
+        date = utc_now - timedelta(days=7 - i)
+        anomalous_data.extend(
+            [
+                {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": "Superman"}
+                for _ in range(72)
+            ]
+        )
+        anomalous_data.extend(
+            [
+                {
+                    TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
+                    "superhero": "Spiderman",
+                }
+                for _ in range(28)
+            ]
+        )
+
+    all_data = normal_data + anomalous_data
+
+    test_args = {
+        **DBT_TEST_ARGS,
+        "training_period": {"period": "day", "count": 30},
+        "detection_period": {"period": "day", "count": 7},
+        "time_bucket": {"period": "day", "count": 1},
+        "sensitivity": 5,
+    }
+    if exclude_detection:
+        test_args["exclude_detection_period_from_training"] = True
+
+    test_result = dbt_project.test(
+        test_id,
+        DBT_TEST_NAME,
+        test_args,
+        data=all_data,
+    )
+
+    assert test_result["status"] == expected_status
diff --git a/macros/edr/tests/test_dimension_anomalies.sql b/macros/edr/tests/test_dimension_anomalies.sql
@@ -1,4 +1,4 @@
-{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results) %}
+{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results, exclude_detection_period_from_training=false) %}
     {{ config(tags = ['elementary-tests']) }}
     {%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
         {% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %}
@@ -39,7 +39,8 @@
                                                                                                    anomaly_exclude_metrics=anomaly_exclude_metrics,
                                                                                                    detection_period=detection_period,
                                                                                                    training_period=training_period,
-                                                                                                   exclude_final_results=exclude_final_results) %}
+                                                                                                   exclude_final_results=exclude_final_results,
+                                                                                                   exclude_detection_period_from_training=exclude_detection_period_from_training) %}
 
         {%- if not test_configuration %}
             {{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }}