From f83265cae6549d9640819ebb10e19e04f1d366fd Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Thu, 13 Nov 2025 13:10:06 +0000
Subject: [PATCH 1/6] Add exclude_detection_period_from_training flag to
 dimension anomaly test

- Added exclude_detection_period_from_training parameter to test_dimension_anomalies macro signature with default value false
- Passed the parameter through to get_anomalies_test_configuration
- This brings dimension anomalies in line with table/volume anomalies which already support this flag
- The underlying logic in get_anomaly_scores_query.sql already handles this parameter for all anomaly types

Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
---
 macros/edr/tests/test_dimension_anomalies.sql | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/macros/edr/tests/test_dimension_anomalies.sql b/macros/edr/tests/test_dimension_anomalies.sql
index 6412973a2..350e6e925 100644
--- a/macros/edr/tests/test_dimension_anomalies.sql
+++ b/macros/edr/tests/test_dimension_anomalies.sql
@@ -1,4 +1,4 @@
-{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results) %}
+{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results, exclude_detection_period_from_training=false) %}
     {{ config(tags = ['elementary-tests']) }}
     {%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
         {% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %}
@@ -39,7 +39,8 @@
                                                                                                    anomaly_exclude_metrics=anomaly_exclude_metrics,
                                                                                                    detection_period=detection_period,
                                                                                                    training_period=training_period,
-                                                                                                   exclude_final_results=exclude_final_results) %}
+                                                                                                   exclude_final_results=exclude_final_results,
+                                                                                                   exclude_detection_period_from_training=exclude_detection_period_from_training) %}
 
         {%- if not test_configuration %}
             {{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }}

From 9872781e735042eec778fbe9f8ba212f1bb30fbd Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Thu, 13 Nov 2025 13:24:03 +0000
Subject: [PATCH 2/6] Add integration test for
 exclude_detection_period_from_training in dimension anomalies

- Added test_dimension_exclude_detection_from_training to demonstrate the flag's behavior
- Test shows that without exclusion, anomaly is missed (test passes) because training includes the detection period
- Test shows that with exclusion, anomaly is detected (test fails) because training excludes the detection period
- Uses 30 days of normal data with variance (45/50/55 pattern) and 7 days of anomalous data (72/28 distribution)
- Follows the same pattern as test_exclude_detection_from_training in test_volume_anomalies.py

Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
---
 .../tests/test_dimension_anomalies.py         | 106 ++++++++++++++++++
 1 file changed, 106 insertions(+)

diff --git a/integration_tests/tests/test_dimension_anomalies.py b/integration_tests/tests/test_dimension_anomalies.py
index 69e9e8637..7cd7c18fd 100644
--- a/integration_tests/tests/test_dimension_anomalies.py
+++ b/integration_tests/tests/test_dimension_anomalies.py
@@ -218,3 +218,109 @@ def test_dimension_anomalies_with_timestamp_exclude_final_results(
     test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
     assert test_result["status"] == "fail"
     assert test_result["failures"] == 1
+
+
+# Test for exclude_detection_period_from_training functionality
+# This test demonstrates the use case where:
+# 1. Detection period contains anomalous distribution data that would normally be included in training
+# 2. With exclude_detection_period_from_training=False: anomaly is missed (test passes) because training includes the anomaly
+# 3. With exclude_detection_period_from_training=True: anomaly is detected (test fails) because training excludes the anomaly
+@pytest.mark.skip_targets(["clickhouse"])
+def test_dimension_exclude_detection_from_training(
+    test_id: str, dbt_project: DbtProject
+):
+    """
+    Test the exclude_detection_period_from_training flag functionality for dimension anomalies.
+
+    Scenario:
+    - 30 days of normal data with variance (45/50/55 Superman, 55/50/45 Spiderman pattern)
+    - 7 days of anomalous data (72 Superman, 28 Spiderman per day) in detection period
+    - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
+    - With exclusion: anomaly excluded from training, test fails (detects anomaly)
+    """
+    utc_now = datetime.utcnow()
+
+    # Generate 30 days of normal data with variance (45/50/55 pattern for Superman)
+    normal_pattern = [45, 50, 55]
+    normal_data = []
+    for i in range(30):
+        date = utc_now - timedelta(days=37 - i)
+        superman_count = normal_pattern[i % 3]
+        spiderman_count = 100 - superman_count
+        normal_data.extend(
+            [
+                {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": "Superman"}
+                for _ in range(superman_count)
+            ]
+        )
+        normal_data.extend(
+            [
+                {
+                    TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
+                    "superhero": "Spiderman",
+                }
+                for _ in range(spiderman_count)
+            ]
+        )
+
+    # Generate 7 days of anomalous data (72 Superman, 28 Spiderman per day) - this will be in detection period
+    anomalous_data = []
+    for i in range(7):
+        date = utc_now - timedelta(days=7 - i)
+        anomalous_data.extend(
+            [
+                {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": "Superman"}
+                for _ in range(72)
+            ]
+        )
+        anomalous_data.extend(
+            [
+                {
+                    TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
+                    "superhero": "Spiderman",
+                }
+                for _ in range(28)
+            ]
+        )
+
+    all_data = normal_data + anomalous_data
+
+    # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
+    test_args_without_exclusion = {
+        **DBT_TEST_ARGS,
+        "training_period": {"period": "day", "count": 30},
+        "detection_period": {"period": "day", "count": 7},
+        "time_bucket": {"period": "day", "count": 1},
+        "sensitivity": 5,
+        # exclude_detection_period_from_training is not set (defaults to False/None)
+    }
+
+    test_result_without_exclusion = dbt_project.test(
+        test_id + "_without_exclusion",
+        DBT_TEST_NAME,
+        test_args_without_exclusion,
+        data=all_data,
+    )
+
+    # This should PASS because the anomaly is included in training, making it part of the baseline
+    assert (
+        test_result_without_exclusion["status"] == "pass"
+    ), "Test should pass when anomaly is included in training"
+
+    # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
+    test_args_with_exclusion = {
+        **test_args_without_exclusion,
+        "exclude_detection_period_from_training": True,
+    }
+
+    test_result_with_exclusion = dbt_project.test(
+        test_id + "_with_exclusion",
+        DBT_TEST_NAME,
+        test_args_with_exclusion,
+        data=all_data,
+    )
+
+    # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
+    assert (
+        test_result_with_exclusion["status"] == "fail"
+    ), "Test should fail when anomaly is excluded from training"

From b8b837acf30464ad43bf436c5281ce90d51c6357 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Thu, 20 Nov 2025 10:52:47 +0000
Subject: [PATCH 3/6] Fix test_dimension_exclude_detection_from_training:
 shorten test ID suffixes to avoid Postgres 63-char limit

Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
---
 integration_tests/tests/test_dimension_anomalies.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/integration_tests/tests/test_dimension_anomalies.py b/integration_tests/tests/test_dimension_anomalies.py
index 7cd7c18fd..af586a947 100644
--- a/integration_tests/tests/test_dimension_anomalies.py
+++ b/integration_tests/tests/test_dimension_anomalies.py
@@ -296,7 +296,7 @@ def test_dimension_exclude_detection_from_training(
     }
 
     test_result_without_exclusion = dbt_project.test(
-        test_id + "_without_exclusion",
+        test_id + "_f",
         DBT_TEST_NAME,
         test_args_without_exclusion,
         data=all_data,
@@ -314,7 +314,7 @@ def test_dimension_exclude_detection_from_training(
     }
 
     test_result_with_exclusion = dbt_project.test(
-        test_id + "_with_exclusion",
+        test_id + "_t",
         DBT_TEST_NAME,
         test_args_with_exclusion,
         data=all_data,

From 370a4cb768b97e0c93eddf980fcac702da20955f Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Thu, 20 Nov 2025 13:04:19 +0000
Subject: [PATCH 4/6] Refactor test to use parametrization: rename to
 test_anomaly_in_detection_period

- Rename test_dimension_exclude_detection_from_training to test_anomaly_in_detection_period
- Add @pytest.mark.parametrize decorator with exclude_detection and expected_status parameters
- Use descriptive IDs: include_detection_in_training and exclude_detection_from_training
- Consolidate two test cases into one parametrized test for better maintainability
- Addresses reviewer feedback on PR #890

Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
---
 .../tests/test_dimension_anomalies.py         | 55 ++++++++-----------
 1 file changed, 23 insertions(+), 32 deletions(-)

diff --git a/integration_tests/tests/test_dimension_anomalies.py b/integration_tests/tests/test_dimension_anomalies.py
index af586a947..f82298fde 100644
--- a/integration_tests/tests/test_dimension_anomalies.py
+++ b/integration_tests/tests/test_dimension_anomalies.py
@@ -223,11 +223,22 @@ def test_dimension_anomalies_with_timestamp_exclude_final_results(
 # Test for exclude_detection_period_from_training functionality
 # This test demonstrates the use case where:
 # 1. Detection period contains anomalous distribution data that would normally be included in training
-# 2. With exclude_detection_period_from_training=False: anomaly is missed (test passes) because training includes the anomaly
-# 3. With exclude_detection_period_from_training=True: anomaly is detected (test fails) because training excludes the anomaly
+# 2. With exclude_detection=False: anomaly is missed (test passes) because training includes the anomaly
+# 3. With exclude_detection=True: anomaly is detected (test fails) because training excludes the anomaly
 @pytest.mark.skip_targets(["clickhouse"])
-def test_dimension_exclude_detection_from_training(
-    test_id: str, dbt_project: DbtProject
+@pytest.mark.parametrize(
+    "exclude_detection,expected_status",
+    [
+        (False, "pass"),  # include detection in training → anomaly absorbed
+        (True, "fail"),  # exclude detection from training → anomaly detected
+    ],
+    ids=["include_detection_in_training", "exclude_detection_from_training"],
+)
+def test_anomaly_in_detection_period(
+    test_id: str,
+    dbt_project: DbtProject,
+    exclude_detection: bool,
+    expected_status: str,
 ):
     """
     Test the exclude_detection_period_from_training flag functionality for dimension anomalies.
@@ -285,42 +296,22 @@ def test_dimension_exclude_detection_from_training(
 
     all_data = normal_data + anomalous_data
 
-    # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
-    test_args_without_exclusion = {
+    test_args = {
         **DBT_TEST_ARGS,
         "training_period": {"period": "day", "count": 30},
         "detection_period": {"period": "day", "count": 7},
         "time_bucket": {"period": "day", "count": 1},
         "sensitivity": 5,
-        # exclude_detection_period_from_training is not set (defaults to False/None)
-    }
-
-    test_result_without_exclusion = dbt_project.test(
-        test_id + "_f",
-        DBT_TEST_NAME,
-        test_args_without_exclusion,
-        data=all_data,
-    )
-
-    # This should PASS because the anomaly is included in training, making it part of the baseline
-    assert (
-        test_result_without_exclusion["status"] == "pass"
-    ), "Test should pass when anomaly is included in training"
-
-    # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
-    test_args_with_exclusion = {
-        **test_args_without_exclusion,
-        "exclude_detection_period_from_training": True,
     }
+    if exclude_detection:
+        test_args["exclude_detection_period_from_training"] = True
 
-    test_result_with_exclusion = dbt_project.test(
-        test_id + "_t",
+    suffix = "_excl" if exclude_detection else "_incl"
+    test_result = dbt_project.test(
+        test_id + suffix,
         DBT_TEST_NAME,
-        test_args_with_exclusion,
+        test_args,
         data=all_data,
     )
 
-    # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
-    assert (
-        test_result_with_exclusion["status"] == "fail"
-    ), "Test should fail when anomaly is excluded from training"
+    assert test_result["status"] == expected_status

From fd75902f9438a0c2a12e4ed335c2212821406f19 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Thu, 20 Nov 2025 13:24:37 +0000
Subject: [PATCH 5/6] Fix Postgres 63-char identifier limit: shorten
 parametrize IDs and remove redundant suffix

- Change parametrize IDs from 'include_detection_in_training'/'exclude_detection_from_training' to 'exclude_false'/'exclude_true'
- Remove redundant suffix (_incl/_excl) since pytest parametrize IDs already differentiate test cases
- New table names: test_anomaly_in_detection_period_exclude_false (44 chars) and test_anomaly_in_detection_period_exclude_true (43 chars)
- Both are well under Postgres 63-character limit
- Fixes CI failures on Postgres (latest_official and latest_pre)

Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
---
 integration_tests/tests/test_dimension_anomalies.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/integration_tests/tests/test_dimension_anomalies.py b/integration_tests/tests/test_dimension_anomalies.py
index f82298fde..57f3ccf2c 100644
--- a/integration_tests/tests/test_dimension_anomalies.py
+++ b/integration_tests/tests/test_dimension_anomalies.py
@@ -232,7 +232,10 @@ def test_dimension_anomalies_with_timestamp_exclude_final_results(
         (False, "pass"),  # include detection in training → anomaly absorbed
         (True, "fail"),  # exclude detection from training → anomaly detected
     ],
-    ids=["include_detection_in_training", "exclude_detection_from_training"],
+    ids=[
+        "exclude_false",
+        "exclude_true",
+    ],  # Shortened to stay under Postgres 63-char limit
 )
 def test_anomaly_in_detection_period(
     test_id: str,
@@ -248,6 +251,8 @@ def test_anomaly_in_detection_period(
     - 7 days of anomalous data (72 Superman, 28 Spiderman per day) in detection period
     - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
     - With exclusion: anomaly excluded from training, test fails (detects anomaly)
+
+    Note: Parametrize IDs are shortened to avoid Postgres 63-character identifier limit.
     """
     utc_now = datetime.utcnow()
 
@@ -306,9 +311,8 @@ def test_anomaly_in_detection_period(
     if exclude_detection:
         test_args["exclude_detection_period_from_training"] = True
 
-    suffix = "_excl" if exclude_detection else "_incl"
     test_result = dbt_project.test(
-        test_id + suffix,
+        test_id,
         DBT_TEST_NAME,
         test_args,
         data=all_data,

From 311d543ad8e02e55699c89cb57a3cb9595b51656 Mon Sep 17 00:00:00 2001
From: arbiv <yosef@elementary-data.com>
Date: Mon, 24 Nov 2025 14:06:58 +0200
Subject: [PATCH 6/6] Fix test_anomaly_in_detection_period to use date object
 instead of datetime

Change utc_now from datetime.utcnow() to datetime.utcnow().date() to match
the pattern used in other tests. Date arithmetic already works correctly
with date objects.
---
 integration_tests/tests/test_dimension_anomalies.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integration_tests/tests/test_dimension_anomalies.py b/integration_tests/tests/test_dimension_anomalies.py
index 57f3ccf2c..d55354a56 100644
--- a/integration_tests/tests/test_dimension_anomalies.py
+++ b/integration_tests/tests/test_dimension_anomalies.py
@@ -254,7 +254,7 @@ def test_anomaly_in_detection_period(
 
     Note: Parametrize IDs are shortened to avoid Postgres 63-character identifier limit.
     """
-    utc_now = datetime.utcnow()
+    utc_now = datetime.utcnow().date()
 
     # Generate 30 days of normal data with variance (45/50/55 pattern for Superman)
     normal_pattern = [45, 50, 55]