Skip to content
101 changes: 101 additions & 0 deletions integration_tests/tests/test_dimension_anomalies.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,3 +218,104 @@ def test_dimension_anomalies_with_timestamp_exclude_final_results(
test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
assert test_result["status"] == "fail"
assert test_result["failures"] == 1


# Test for exclude_detection_period_from_training functionality
# This test demonstrates the use case where:
# 1. Detection period contains anomalous distribution data that would normally be included in training
# 2. With exclude_detection=False: anomaly is missed (test passes) because training includes the anomaly
# 3. With exclude_detection=True: anomaly is detected (test fails) because training excludes the anomaly
@pytest.mark.skip_targets(["clickhouse"])
@pytest.mark.parametrize(
"exclude_detection,expected_status",
[
(False, "pass"), # include detection in training → anomaly absorbed
(True, "fail"), # exclude detection from training → anomaly detected
],
ids=[
"exclude_false",
"exclude_true",
], # Shortened to stay under Postgres 63-char limit
)
def test_anomaly_in_detection_period(
test_id: str,
dbt_project: DbtProject,
exclude_detection: bool,
expected_status: str,
):
"""
Test the exclude_detection_period_from_training flag functionality for dimension anomalies.

Scenario:
- 30 days of normal data with variance (45/50/55 Superman, 55/50/45 Spiderman pattern)
- 7 days of anomalous data (72 Superman, 28 Spiderman per day) in detection period
- Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
- With exclusion: anomaly excluded from training, test fails (detects anomaly)

Note: Parametrize IDs are shortened to avoid Postgres 63-character identifier limit.
"""
utc_now = datetime.utcnow().date()

# Generate 30 days of normal data with variance (45/50/55 pattern for Superman)
normal_pattern = [45, 50, 55]
normal_data = []
for i in range(30):
date = utc_now - timedelta(days=37 - i)
superman_count = normal_pattern[i % 3]
spiderman_count = 100 - superman_count
normal_data.extend(
[
{TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": "Superman"}
for _ in range(superman_count)
]
)
normal_data.extend(
[
{
TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
"superhero": "Spiderman",
}
for _ in range(spiderman_count)
]
)

# Generate 7 days of anomalous data (72 Superman, 28 Spiderman per day) - this will be in detection period
anomalous_data = []
for i in range(7):
date = utc_now - timedelta(days=7 - i)
anomalous_data.extend(
[
{TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT), "superhero": "Superman"}
for _ in range(72)
]
)
anomalous_data.extend(
[
{
TIMESTAMP_COLUMN: date.strftime(DATE_FORMAT),
"superhero": "Spiderman",
}
for _ in range(28)
]
)

all_data = normal_data + anomalous_data

test_args = {
**DBT_TEST_ARGS,
"training_period": {"period": "day", "count": 30},
"detection_period": {"period": "day", "count": 7},
"time_bucket": {"period": "day", "count": 1},
"sensitivity": 5,
}
if exclude_detection:
test_args["exclude_detection_period_from_training"] = True

test_result = dbt_project.test(
test_id,
DBT_TEST_NAME,
test_args,
data=all_data,
)

assert test_result["status"] == expected_status
5 changes: 3 additions & 2 deletions macros/edr/tests/test_dimension_anomalies.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results) %}
{% test dimension_anomalies(model, dimensions, timestamp_column, where_expression, anomaly_sensitivity, anomaly_direction, min_training_set_size, time_bucket, days_back, backfill_days, seasonality, sensitivity,ignore_small_changes, fail_on_zero, detection_delay, anomaly_exclude_metrics, detection_period, training_period, exclude_final_results, exclude_detection_period_from_training=false) %}
{{ config(tags = ['elementary-tests']) }}
{%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
{% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %}
Expand Down Expand Up @@ -39,7 +39,8 @@
anomaly_exclude_metrics=anomaly_exclude_metrics,
detection_period=detection_period,
training_period=training_period,
exclude_final_results=exclude_final_results) %}
exclude_final_results=exclude_final_results,
exclude_detection_period_from_training=exclude_detection_period_from_training) %}

{%- if not test_configuration %}
{{ exceptions.raise_compiler_error("Failed to create test configuration dict for test `{}`".format(test_table_name)) }}
Expand Down