From bb75d828702de31da53fc377db05f8d68f52d3c5 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Thu, 13 Nov 2025 10:01:42 +0000
Subject: [PATCH 1/5] Add exclude_detection_period_from_training flag to column
 anomalies tests

Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
---
 macros/edr/tests/test_all_columns_anomalies.sql | 5 +++--
 macros/edr/tests/test_column_anomalies.sql      | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/macros/edr/tests/test_all_columns_anomalies.sql b/macros/edr/tests/test_all_columns_anomalies.sql
index 37c78f3c9..3c117de50 100644
--- a/macros/edr/tests/test_all_columns_anomalies.sql
+++ b/macros/edr/tests/test_all_columns_anomalies.sql
@@ -1,4 +1,4 @@
-{% test all_columns_anomalies(model, column_anomalies, exclude_prefix, exclude_regexp, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, dimensions) %}
+{% test all_columns_anomalies(model, column_anomalies, exclude_prefix, exclude_regexp, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, dimensions, exclude_detection_period_from_training=false) %}
     {{ config(tags = ['elementary-tests']) }}
     {%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
         {% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %}
@@ -37,7 +37,8 @@
                                                                                                    anomaly_exclude_metrics=anomaly_exclude_metrics,
                                                                                                    detection_period=detection_period,
                                                                                                    training_period=training_period,
-                                                                                                   dimensions=dimensions) %}
+                                                                                                   dimensions=dimensions,
+                                                                                                   exclude_detection_period_from_training=exclude_detection_period_from_training) %>
 
         {%- if not test_configuration %}
             {{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }}
diff --git a/macros/edr/tests/test_column_anomalies.sql b/macros/edr/tests/test_column_anomalies.sql
index 94480d2c2..39f8eb630 100644
--- a/macros/edr/tests/test_column_anomalies.sql
+++ b/macros/edr/tests/test_column_anomalies.sql
@@ -1,4 +1,4 @@
-{% test column_anomalies(model, column_name, column_anomalies, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, dimensions) %}
+{% test column_anomalies(model, column_name, column_anomalies, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, dimensions, exclude_detection_period_from_training=false) %}
     {{ config(tags = ['elementary-tests']) }}
     {%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
         {% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %}
@@ -36,7 +36,8 @@
                                                                                                    anomaly_exclude_metrics=anomaly_exclude_metrics,
                                                                                                    detection_period=detection_period,
                                                                                                    training_period=training_period,
-                                                                                                   dimensions=dimensions) %}
+                                                                                                   dimensions=dimensions,
+                                                                                                   exclude_detection_period_from_training=exclude_detection_period_from_training) %}
 
         {%- if not test_configuration %}
             {{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }}

From 8c2588a4f71a80d8e8a881fefc472d00fe6ba66d Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Thu, 13 Nov 2025 10:07:17 +0000
Subject: [PATCH 2/5] Fix Jinja syntax error: change %> to %} in
 test_all_columns_anomalies.sql

Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
---
 macros/edr/tests/test_all_columns_anomalies.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/macros/edr/tests/test_all_columns_anomalies.sql b/macros/edr/tests/test_all_columns_anomalies.sql
index 3c117de50..e3d8995a1 100644
--- a/macros/edr/tests/test_all_columns_anomalies.sql
+++ b/macros/edr/tests/test_all_columns_anomalies.sql
@@ -38,7 +38,7 @@
                                                                                                    detection_period=detection_period,
                                                                                                    training_period=training_period,
                                                                                                    dimensions=dimensions,
-                                                                                                   exclude_detection_period_from_training=exclude_detection_period_from_training) %>
+                                                                                                   exclude_detection_period_from_training=exclude_detection_period_from_training) %}
 
         {%- if not test_configuration %}
             {{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }}

From 6b60a66bd837cee4174e53b3a63b83e41d654231 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Thu, 13 Nov 2025 13:23:35 +0000
Subject: [PATCH 3/5] Add integration test for
 exclude_detection_period_from_training flag

Test demonstrates that:
- When flag=False: detection period data is included in training baseline, preventing anomaly detection
- When flag=True: detection period data is excluded from training baseline, enabling anomaly detection

The test uses constrained time windows (1 day training/detection) to make the behavior deterministic.

Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
---
 .../tests/test_column_anomalies.py            | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/integration_tests/tests/test_column_anomalies.py b/integration_tests/tests/test_column_anomalies.py
index 709c49cf9..dc6e7173c 100644
--- a/integration_tests/tests/test_column_anomalies.py
+++ b/integration_tests/tests/test_column_anomalies.py
@@ -476,3 +476,74 @@ def test_anomalous_boolean_column_anomalies(test_id: str, dbt_project: DbtProjec
         "count_true",
         "count_false",
     }
+
+
+# Anomalies currently not supported on ClickHouse
+@pytest.mark.skip_targets(["clickhouse"])
+def test_column_anomalies_exclude_detection_period_from_training(
+    test_id: str, dbt_project: DbtProject
+):
+    utc_today = datetime.utcnow().date()
+    test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1))
+
+    data: List[Dict[str, Any]] = [
+        {
+            TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT),
+            "superhero": superhero,
+        }
+        for cur_date in training_dates
+        for superhero in ["Superman", "Batman"]
+    ]
+
+    data += [
+        {TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": None}
+        for _ in range(10)
+    ]
+
+    test_args_false = {
+        "timestamp_column": TIMESTAMP_COLUMN,
+        "column_anomalies": ["null_count"],
+        "time_bucket": {"period": "day", "count": 1},
+        "training_period": {"period": "day", "count": 1},
+        "detection_period": {"period": "day", "count": 1},
+        "min_training_set_size": 1,
+        "anomaly_sensitivity": 3,
+        "anomaly_direction": "spike",
+        "exclude_detection_period_from_training": False,
+    }
+    test_result_false = dbt_project.test(
+        test_id,
+        DBT_TEST_NAME,
+        test_args_false,
+        data=data,
+        test_column="superhero",
+        test_vars={"force_metrics_backfill": True},
+    )
+    assert test_result_false["status"] == "pass", (
+        "Expected PASS when exclude_detection_period_from_training=False "
+        "(detection data included in training baseline)"
+    )
+
+    test_args_true = {
+        "timestamp_column": TIMESTAMP_COLUMN,
+        "column_anomalies": ["null_count"],
+        "time_bucket": {"period": "day", "count": 1},
+        "training_period": {"period": "day", "count": 1},
+        "detection_period": {"period": "day", "count": 1},
+        "min_training_set_size": 1,
+        "anomaly_sensitivity": 3,
+        "anomaly_direction": "spike",
+        "exclude_detection_period_from_training": True,
+    }
+    test_result_true = dbt_project.test(
+        test_id,
+        DBT_TEST_NAME,
+        test_args_true,
+        data=data,
+        test_column="superhero",
+        test_vars={"force_metrics_backfill": True},
+    )
+    assert test_result_true["status"] == "fail", (
+        "Expected FAIL when exclude_detection_period_from_training=True "
+        "(detection data excluded from training baseline, anomaly detected)"
+    )

From 8a70f2abc1023ad9c2322881250247f837dcc897 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 19 Nov 2025 10:49:33 +0000
Subject: [PATCH 4/5] Fix
 test_column_anomalies_exclude_detection_period_from_training with more
 substantial dataset

- Use 30 days of normal data with low null count (0-2 nulls/day) instead of 1 day
- Use 7 days of anomalous data with high null count (20 nulls/day) instead of 1 day
- Update training period to 30 days and detection period to 7 days
- Add more data per day to create clearer anomaly signal
- Use separate test IDs for the two test runs to avoid conflicts
- Pattern matches successful volume and freshness anomalies tests

Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
---
 .../tests/test_column_anomalies.py            | 108 ++++++++++++------
 1 file changed, 70 insertions(+), 38 deletions(-)

diff --git a/integration_tests/tests/test_column_anomalies.py b/integration_tests/tests/test_column_anomalies.py
index dc6e7173c..5019c3c22 100644
--- a/integration_tests/tests/test_column_anomalies.py
+++ b/integration_tests/tests/test_column_anomalies.py
@@ -483,67 +483,99 @@ def test_anomalous_boolean_column_anomalies(test_id: str, dbt_project: DbtProjec
 def test_column_anomalies_exclude_detection_period_from_training(
     test_id: str, dbt_project: DbtProject
 ):
+    """
+    Test the exclude_detection_period_from_training flag functionality for column anomalies.
+
+    Scenario:
+    - 30 days of normal data with low null count (0-2 nulls per day)
+    - 7 days of anomalous data with high null count (20 nulls per day) in detection period
+    - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
+    - With exclusion: anomaly excluded from training, test fails (detects anomaly)
+    """
     utc_today = datetime.utcnow().date()
-    test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1))
-
-    data: List[Dict[str, Any]] = [
-        {
-            TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT),
-            "superhero": superhero,
-        }
-        for cur_date in training_dates
-        for superhero in ["Superman", "Batman"]
-    ]
-
-    data += [
-        {TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "superhero": None}
-        for _ in range(10)
-    ]
 
-    test_args_false = {
+    # Generate 30 days of normal data with low null count (0-2 nulls per day)
+    normal_data = []
+    for i in range(30):
+        date = utc_today - timedelta(days=37 - i)
+        normal_data.extend(
+            [
+                {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": superhero}
+                for superhero in ["Superman", "Batman", "Wonder Woman", "Flash"] * 5
+            ]
+        )
+        null_count = i % 3
+        normal_data.extend(
+            [
+                {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": None}
+                for _ in range(null_count)
+            ]
+        )
+
+    # Generate 7 days of anomalous data with high null count (20 nulls per day)
+    anomalous_data = []
+    for i in range(7):
+        date = utc_today - timedelta(days=7 - i)
+        anomalous_data.extend(
+            [
+                {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": superhero}
+                for superhero in ["Superman", "Batman"]
+            ]
+        )
+        anomalous_data.extend(
+            [
+                {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": None}
+                for _ in range(20)
+            ]
+        )
+
+    all_data = normal_data + anomalous_data
+
+    # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
+    test_args_without_exclusion = {
         "timestamp_column": TIMESTAMP_COLUMN,
         "column_anomalies": ["null_count"],
         "time_bucket": {"period": "day", "count": 1},
-        "training_period": {"period": "day", "count": 1},
-        "detection_period": {"period": "day", "count": 1},
-        "min_training_set_size": 1,
+        "training_period": {"period": "day", "count": 30},
+        "detection_period": {"period": "day", "count": 7},
+        "min_training_set_size": 5,
         "anomaly_sensitivity": 3,
         "anomaly_direction": "spike",
         "exclude_detection_period_from_training": False,
     }
-    test_result_false = dbt_project.test(
-        test_id,
+
+    test_result_without_exclusion = dbt_project.test(
+        test_id + "_without_exclusion",
         DBT_TEST_NAME,
-        test_args_false,
-        data=data,
+        test_args_without_exclusion,
+        data=all_data,
         test_column="superhero",
         test_vars={"force_metrics_backfill": True},
     )
-    assert test_result_false["status"] == "pass", (
+
+    # This should PASS because the anomaly is included in training, making it part of the baseline
+    assert test_result_without_exclusion["status"] == "pass", (
         "Expected PASS when exclude_detection_period_from_training=False "
         "(detection data included in training baseline)"
     )
 
-    test_args_true = {
-        "timestamp_column": TIMESTAMP_COLUMN,
-        "column_anomalies": ["null_count"],
-        "time_bucket": {"period": "day", "count": 1},
-        "training_period": {"period": "day", "count": 1},
-        "detection_period": {"period": "day", "count": 1},
-        "min_training_set_size": 1,
-        "anomaly_sensitivity": 3,
-        "anomaly_direction": "spike",
+    # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
+    test_args_with_exclusion = {
+        **test_args_without_exclusion,
         "exclude_detection_period_from_training": True,
     }
-    test_result_true = dbt_project.test(
-        test_id,
+
+    test_result_with_exclusion = dbt_project.test(
+        test_id + "_with_exclusion",
         DBT_TEST_NAME,
-        test_args_true,
-        data=data,
+        test_args_with_exclusion,
+        data=all_data,
         test_column="superhero",
         test_vars={"force_metrics_backfill": True},
     )
-    assert test_result_true["status"] == "fail", (
+
+    # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
+    assert test_result_with_exclusion["status"] == "fail", (
         "Expected FAIL when exclude_detection_period_from_training=True "
         "(detection data excluded from training baseline, anomaly detected)"
     )

From 7fdd2d732bfa50a8a538fbbd7c57875f500e1999 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 19 Nov 2025 11:08:58 +0000
Subject: [PATCH 5/5] Fix test_col_anom_excl_detect_train: rename function and
 adjust test data

- Rename test function to fix Postgres 63-char table name limit
- Update test data: 8-12 nulls/day normal, 20 nulls/day anomalous
- Increase sensitivity to 5 to match volume anomalies pattern
- Shorten test ID suffixes to _f/_t
- Test now passes locally with postgres

Co-Authored-By: Yosef Arbiv <yosef.arbiv@gmail.com>
---
 .../tests/test_column_anomalies.py            | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/integration_tests/tests/test_column_anomalies.py b/integration_tests/tests/test_column_anomalies.py
index 5019c3c22..d4217009c 100644
--- a/integration_tests/tests/test_column_anomalies.py
+++ b/integration_tests/tests/test_column_anomalies.py
@@ -480,9 +480,7 @@ def test_anomalous_boolean_column_anomalies(test_id: str, dbt_project: DbtProjec
 
 # Anomalies currently not supported on ClickHouse
 @pytest.mark.skip_targets(["clickhouse"])
-def test_column_anomalies_exclude_detection_period_from_training(
-    test_id: str, dbt_project: DbtProject
-):
+def test_col_anom_excl_detect_train(test_id: str, dbt_project: DbtProject):
     """
     Test the exclude_detection_period_from_training flag functionality for column anomalies.
 
@@ -494,17 +492,18 @@ def test_column_anomalies_exclude_detection_period_from_training(
     """
     utc_today = datetime.utcnow().date()
 
-    # Generate 30 days of normal data with low null count (0-2 nulls per day)
+    # Generate 30 days of normal data with variance in null count (8, 10, 12 pattern)
+    normal_pattern = [8, 10, 12]
     normal_data = []
     for i in range(30):
         date = utc_today - timedelta(days=37 - i)
+        null_count = normal_pattern[i % 3]
         normal_data.extend(
             [
                 {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": superhero}
-                for superhero in ["Superman", "Batman", "Wonder Woman", "Flash"] * 5
+                for superhero in ["Superman", "Batman", "Wonder Woman", "Flash"] * 10
             ]
         )
-        null_count = i % 3
         normal_data.extend(
             [
                 {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": None}
@@ -512,14 +511,14 @@ def test_column_anomalies_exclude_detection_period_from_training(
             ]
         )
 
-    # Generate 7 days of anomalous data with high null count (20 nulls per day)
+    # Generate 7 days of anomalous data (20 nulls per day) - 100% increase from mean
     anomalous_data = []
     for i in range(7):
         date = utc_today - timedelta(days=7 - i)
         anomalous_data.extend(
             [
                 {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": superhero}
-                for superhero in ["Superman", "Batman"]
+                for superhero in ["Superman", "Batman", "Wonder Woman", "Flash"] * 10
             ]
         )
         anomalous_data.extend(
@@ -539,13 +538,13 @@ def test_column_anomalies_exclude_detection_period_from_training(
         "training_period": {"period": "day", "count": 30},
         "detection_period": {"period": "day", "count": 7},
         "min_training_set_size": 5,
-        "anomaly_sensitivity": 3,
+        "anomaly_sensitivity": 5,
         "anomaly_direction": "spike",
         "exclude_detection_period_from_training": False,
     }
 
     test_result_without_exclusion = dbt_project.test(
-        test_id + "_without_exclusion",
+        test_id + "_f",
         DBT_TEST_NAME,
         test_args_without_exclusion,
         data=all_data,
@@ -566,7 +565,7 @@ def test_column_anomalies_exclude_detection_period_from_training(
     }
 
     test_result_with_exclusion = dbt_project.test(
-        test_id + "_with_exclusion",
+        test_id + "_t",
         DBT_TEST_NAME,
         test_args_with_exclusion,
         data=all_data,