From 02779f61bfc8aadbc18d7b435d57bef9f22c7748 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 13 Nov 2025 13:10:47 +0000 Subject: [PATCH 1/5] Add exclude_detection_period_from_training flag to column and dimension anomaly tests Co-Authored-By: Yosef Arbiv --- macros/edr/tests/test_all_columns_anomalies.sql | 5 +++-- macros/edr/tests/test_column_anomalies.sql | 5 +++-- macros/edr/tests/test_dimension_anomalies.sql | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/macros/edr/tests/test_all_columns_anomalies.sql b/macros/edr/tests/test_all_columns_anomalies.sql index 37c78f3c9..e3d8995a1 100644 --- a/macros/edr/tests/test_all_columns_anomalies.sql +++ b/macros/edr/tests/test_all_columns_anomalies.sql @@ -1,4 +1,4 @@ -{% test all_columns_anomalies(model, column_anomalies, exclude_prefix, exclude_regexp, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, dimensions) %} +{% test all_columns_anomalies(model, column_anomalies, exclude_prefix, exclude_regexp, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, dimensions, exclude_detection_period_from_training=false) %} {{ config(tags = ['elementary-tests']) }} {%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %} {% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %} @@ -37,7 +37,8 @@ anomaly_exclude_metrics=anomaly_exclude_metrics, detection_period=detection_period, training_period=training_period, - dimensions=dimensions) %} + dimensions=dimensions, + exclude_detection_period_from_training=exclude_detection_period_from_training) %} {%- if not test_configuration %} {{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }} diff --git a/macros/edr/tests/test_column_anomalies.sql b/macros/edr/tests/test_column_anomalies.sql index 94480d2c2..39f8eb630 100644 --- a/macros/edr/tests/test_column_anomalies.sql +++ b/macros/edr/tests/test_column_anomalies.sql @@ -1,4 +1,4 @@ -{% test column_anomalies(model, column_name, column_anomalies, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, dimensions) %} +{% test column_anomalies(model, column_name, column_anomalies, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, dimensions, exclude_detection_period_from_training=false) %} {{ config(tags = ['elementary-tests']) }} {%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %} {% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %} @@ -36,7 +36,8 @@ anomaly_exclude_metrics=anomaly_exclude_metrics, detection_period=detection_period, training_period=training_period, - dimensions=dimensions) %} + dimensions=dimensions, + exclude_detection_period_from_training=exclude_detection_period_from_training) %} {%- if not test_configuration %} {{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }} diff --git a/macros/edr/tests/test_dimension_anomalies.sql b/macros/edr/tests/test_dimension_anomalies.sql index 6412973a2..350e6e925 100644 --- a/macros/edr/tests/test_dimension_anomalies.sql +++ b/macros/edr/tests/test_dimension_anomalies.sql @@ -1,4 +1,4 @@ -{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results) %} +{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results, exclude_detection_period_from_training=false) %} {{ config(tags = ['elementary-tests']) }} {%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %} {% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %} @@ -39,7 +39,8 @@ anomaly_exclude_metrics=anomaly_exclude_metrics, detection_period=detection_period, training_period=training_period, - exclude_final_results=exclude_final_results) %} + exclude_final_results=exclude_final_results, + exclude_detection_period_from_training=exclude_detection_period_from_training) %} {%- if not test_configuration %} {{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }} From 36878be7ff7c1e0a5f7325054d879be5ef0c04b7 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 13 Nov 2025 13:21:10 +0000 Subject: [PATCH 2/5] Add test for exclude_detection_period_from_training flag in column anomaly tests Co-Authored-By: Yosef Arbiv --- .../tests/test_all_columns_anomalies.py | 118 ++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/integration_tests/tests/test_all_columns_anomalies.py b/integration_tests/tests/test_all_columns_anomalies.py index 83c8fb38b..47089462e 100644 --- a/integration_tests/tests/test_all_columns_anomalies.py +++ b/integration_tests/tests/test_all_columns_anomalies.py @@ -153,3 +153,121 @@ def test_anomalyless_all_columns_anomalies_all_monitors_sanity( test_id, DBT_TEST_NAME, test_args, data=data, multiple_results=True ) assert all([res["status"] == "pass" for res in test_results]) + + +# Anomalies currently not supported on ClickHouse +@pytest.mark.skip_targets(["clickhouse"]) +def test_exclude_detection_from_training_all_columns( + test_id: str, dbt_project: DbtProject +): + """ + Test the exclude_detection_period_from_training flag functionality for column anomalies. + + Scenario: + - 30 days of normal data with consistent null_count pattern (2 nulls per day) + - 7 days of anomalous data (10 nulls per day) in detection period + - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly) + - With exclusion: anomaly excluded from training, test fails (detects anomaly) + """ + utc_now = datetime.utcnow() + + # Generate 30 days of normal data with consistent null_count (2 nulls per day) + normal_data = [] + for i in range(30): + date = utc_now - timedelta(days=37 - i) + normal_data.extend( + [ + {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": None} + for _ in range(2) + ] + ) + normal_data.extend( + [ + { + TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), + "superhero": "Superman" if i % 2 == 0 else "Batman", + } + for _ in range(8) + ] + ) + + # Generate 7 days of anomalous data (10 nulls per day) - this will be in detection period + anomalous_data = [] + for i in range(7): + date = utc_now - timedelta(days=7 - i) + anomalous_data.extend( + [ + {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": None} + for _ in range(10) + ] + ) + anomalous_data.extend( + [ + { + TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), + "superhero": "Superman" if i % 2 == 0 else "Batman", + } + for _ in range(0) # No non-null values to keep total similar + ] + ) + + all_data = normal_data + anomalous_data + + # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training) + test_args_without_exclusion = { + "timestamp_column": TIMESTAMP_COLUMN, + "column_anomalies": ["null_count"], + "training_period": {"period": "day", "count": 30}, + "detection_period": {"period": "day", "count": 7}, + "time_bucket": {"period": "day", "count": 1}, + "sensitivity": 5, # Higher sensitivity to allow anomaly to be absorbed + # exclude_detection_period_from_training is not set (defaults to False/None) + } + + test_results_without_exclusion = dbt_project.test( + test_id + "_without_exclusion", + DBT_TEST_NAME, + test_args_without_exclusion, + data=all_data, + multiple_results=True, + ) + + # This should PASS because the anomaly is included in training, making it part of the baseline + superhero_result = next( + ( + res + for res in test_results_without_exclusion + if res["column_name"].lower() == "superhero" + ), + None, + ) + assert ( + superhero_result and superhero_result["status"] == "pass" + ), "Test should pass when anomaly is included in training" + + # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training) + test_args_with_exclusion = { + **test_args_without_exclusion, + "exclude_detection_period_from_training": True, + } + + test_results_with_exclusion = dbt_project.test( + test_id + "_with_exclusion", + DBT_TEST_NAME, + test_args_with_exclusion, + data=all_data, + multiple_results=True, + ) + + # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous + superhero_result = next( + ( + res + for res in test_results_with_exclusion + if res["column_name"].lower() == "superhero" + ), + None, + ) + assert ( + superhero_result and superhero_result["status"] == "fail" + ), "Test should fail when anomaly is excluded from training" From 80380a8f722b18d9b6f5d1a4250a3934c4552a12 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 20 Nov 2025 11:56:11 +0000 Subject: [PATCH 3/5] Fix test_exclude_detection_from_training_all_columns: shorten test ID suffixes and adjust test data for proper anomaly detection Co-Authored-By: Yosef Arbiv --- .../tests/test_all_columns_anomalies.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/integration_tests/tests/test_all_columns_anomalies.py b/integration_tests/tests/test_all_columns_anomalies.py index 47089462e..7de0d8e9a 100644 --- a/integration_tests/tests/test_all_columns_anomalies.py +++ b/integration_tests/tests/test_all_columns_anomalies.py @@ -164,21 +164,23 @@ def test_exclude_detection_from_training_all_columns( Test the exclude_detection_period_from_training flag functionality for column anomalies. Scenario: - - 30 days of normal data with consistent null_count pattern (2 nulls per day) - - 7 days of anomalous data (10 nulls per day) in detection period + - 30 days of normal data with variance in null_count pattern (8, 10, 12 nulls per day) + - 7 days of anomalous data (20 nulls per day) in detection period - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly) - With exclusion: anomaly excluded from training, test fails (detects anomaly) """ utc_now = datetime.utcnow() - # Generate 30 days of normal data with consistent null_count (2 nulls per day) + # Generate 30 days of normal data with variance in null_count (8, 10, 12 pattern) + normal_pattern = [8, 10, 12] normal_data = [] for i in range(30): date = utc_now - timedelta(days=37 - i) + null_count = normal_pattern[i % 3] normal_data.extend( [ {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": None} - for _ in range(2) + for _ in range(null_count) ] ) normal_data.extend( @@ -187,18 +189,18 @@ def test_exclude_detection_from_training_all_columns( TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": "Superman" if i % 2 == 0 else "Batman", } - for _ in range(8) + for _ in range(40 - null_count) ] ) - # Generate 7 days of anomalous data (10 nulls per day) - this will be in detection period + # Generate 7 days of anomalous data (20 nulls per day) - 100% increase from mean anomalous_data = [] for i in range(7): date = utc_now - timedelta(days=7 - i) anomalous_data.extend( [ {TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": None} - for _ in range(10) + for _ in range(20) ] ) anomalous_data.extend( @@ -207,7 +209,7 @@ def test_exclude_detection_from_training_all_columns( TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": "Superman" if i % 2 == 0 else "Batman", } - for _ in range(0) # No non-null values to keep total similar + for _ in range(20) # Keep total rows similar ] ) @@ -225,7 +227,7 @@ def test_exclude_detection_from_training_all_columns( } test_results_without_exclusion = dbt_project.test( - test_id + "_without_exclusion", + test_id + "_f", DBT_TEST_NAME, test_args_without_exclusion, data=all_data, @@ -252,7 +254,7 @@ def test_exclude_detection_from_training_all_columns( } test_results_with_exclusion = dbt_project.test( - test_id + "_with_exclusion", + test_id + "_t", DBT_TEST_NAME, test_args_with_exclusion, data=all_data, From 745bf1c1aa5bcc505c7a43aa517031d7ee7469a1 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 20 Nov 2025 13:09:46 +0000 Subject: [PATCH 4/5] Fix datetime.utcnow() deprecation: use datetime.now(timezone.utc) instead Co-Authored-By: Yosef Arbiv --- integration_tests/tests/test_all_columns_anomalies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration_tests/tests/test_all_columns_anomalies.py b/integration_tests/tests/test_all_columns_anomalies.py index 7de0d8e9a..f7e890ad0 100644 --- a/integration_tests/tests/test_all_columns_anomalies.py +++ b/integration_tests/tests/test_all_columns_anomalies.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from typing import Any, Dict, List import pytest @@ -169,7 +169,7 @@ def test_exclude_detection_from_training_all_columns( - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly) - With exclusion: anomaly excluded from training, test fails (detects anomaly) """ - utc_now = datetime.utcnow() + utc_now = datetime.now(timezone.utc) # Generate 30 days of normal data with variance in null_count (8, 10, 12 pattern) normal_pattern = [8, 10, 12] From 3e2a0685553d34080cc20eedf2d4f3319f8bc396 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 20 Nov 2025 13:16:20 +0000 Subject: [PATCH 5/5] Address PR feedback: revert dimension anomalies change and refactor test with parametrization Co-Authored-By: Yosef Arbiv --- .../tests/test_all_columns_anomalies.py | 73 +++++++------------ macros/edr/tests/test_dimension_anomalies.sql | 5 +- 2 files changed, 29 insertions(+), 49 deletions(-) diff --git a/integration_tests/tests/test_all_columns_anomalies.py b/integration_tests/tests/test_all_columns_anomalies.py index 47089462e..aa974830b 100644 --- a/integration_tests/tests/test_all_columns_anomalies.py +++ b/integration_tests/tests/test_all_columns_anomalies.py @@ -157,8 +157,19 @@ def test_anomalyless_all_columns_anomalies_all_monitors_sanity( # Anomalies currently not supported on ClickHouse @pytest.mark.skip_targets(["clickhouse"]) -def test_exclude_detection_from_training_all_columns( - test_id: str, dbt_project: DbtProject +@pytest.mark.parametrize( + "exclude_detection,expected_status", + [ + (False, "pass"), + (True, "fail"), + ], + ids=["without_exclusion", "with_exclusion"], +) +def test_anomaly_in_detection_period( + test_id: str, + dbt_project: DbtProject, + exclude_detection: bool, + expected_status: str, ): """ Test the exclude_detection_period_from_training flag functionality for column anomalies. @@ -166,8 +177,8 @@ def test_exclude_detection_from_training_all_columns( Scenario: - 30 days of normal data with consistent null_count pattern (2 nulls per day) - 7 days of anomalous data (10 nulls per day) in detection period - - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly) - - With exclusion: anomaly excluded from training, test fails (detects anomaly) + - Without exclusion (exclude_detection=False): anomaly gets included in training baseline, test passes + - With exclusion (exclude_detection=True): anomaly excluded from training, test fails (detects anomaly) """ utc_now = datetime.utcnow() @@ -207,67 +218,37 @@ def test_exclude_detection_from_training_all_columns( TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": "Superman" if i % 2 == 0 else "Batman", } - for _ in range(0) # No non-null values to keep total similar + for _ in range(0) ] ) all_data = normal_data + anomalous_data - # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training) - test_args_without_exclusion = { + test_args = { "timestamp_column": TIMESTAMP_COLUMN, "column_anomalies": ["null_count"], "training_period": {"period": "day", "count": 30}, "detection_period": {"period": "day", "count": 7}, "time_bucket": {"period": "day", "count": 1}, - "sensitivity": 5, # Higher sensitivity to allow anomaly to be absorbed - # exclude_detection_period_from_training is not set (defaults to False/None) + "sensitivity": 5, } - test_results_without_exclusion = dbt_project.test( - test_id + "_without_exclusion", - DBT_TEST_NAME, - test_args_without_exclusion, - data=all_data, - multiple_results=True, - ) - - # This should PASS because the anomaly is included in training, making it part of the baseline - superhero_result = next( - ( - res - for res in test_results_without_exclusion - if res["column_name"].lower() == "superhero" - ), - None, - ) - assert ( - superhero_result and superhero_result["status"] == "pass" - ), "Test should pass when anomaly is included in training" - - # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training) - test_args_with_exclusion = { - **test_args_without_exclusion, - "exclude_detection_period_from_training": True, - } + if exclude_detection: + test_args["exclude_detection_period_from_training"] = True - test_results_with_exclusion = dbt_project.test( - test_id + "_with_exclusion", + test_results = dbt_project.test( + test_id, DBT_TEST_NAME, - test_args_with_exclusion, + test_args, data=all_data, multiple_results=True, ) - # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous superhero_result = next( - ( - res - for res in test_results_with_exclusion - if res["column_name"].lower() == "superhero" - ), + (res for res in test_results if res["column_name"].lower() == "superhero"), None, ) + assert superhero_result is not None, "superhero column result not found" assert ( - superhero_result and superhero_result["status"] == "fail" - ), "Test should fail when anomaly is excluded from training" + superhero_result["status"] == expected_status + ), f"Expected status '{expected_status}' but got '{superhero_result['status']}' (exclude_detection={exclude_detection})" diff --git a/macros/edr/tests/test_dimension_anomalies.sql b/macros/edr/tests/test_dimension_anomalies.sql index 350e6e925..6412973a2 100644 --- a/macros/edr/tests/test_dimension_anomalies.sql +++ b/macros/edr/tests/test_dimension_anomalies.sql @@ -1,4 +1,4 @@ -{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results, exclude_detection_period_from_training=false) %} +{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results) %} {{ config(tags = ['elementary-tests']) }} {%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %} {% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %} @@ -39,8 +39,7 @@ anomaly_exclude_metrics=anomaly_exclude_metrics, detection_period=detection_period, training_period=training_period, - exclude_final_results=exclude_final_results, - exclude_detection_period_from_training=exclude_detection_period_from_training) %} + exclude_final_results=exclude_final_results) %} {%- if not test_configuration %} {{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }}