@@ -88,3 +88,105 @@ def test_slower_rate_event_freshness(test_id: str, dbt_project: DbtProject):
8888 test_vars = {"custom_run_started_at" : test_started_at .isoformat ()},
8989 )
9090 assert result ["status" ] == "fail"
91+
92+
93+ # Anomalies currently not supported on ClickHouse
94+ @pytest .mark .skip_targets (["clickhouse" ])
95+ def test_exclude_detection_from_training (test_id : str , dbt_project : DbtProject ):
96+ """
97+ Test the exclude_detection_period_from_training flag functionality for event freshness anomalies.
98+
99+ Scenario:
100+ - 7 days of normal data (5 minute lag between event and update) - training period
101+ - 7 days of anomalous data (5 hour lag) - detection period
102+ - Without exclusion: anomaly gets included in training baseline, test passes (misses anomaly)
103+ - With exclusion: anomaly excluded from training, test fails (detects anomaly)
104+
105+ """
106+ utc_now = datetime .utcnow ()
107+ test_started_at = (utc_now + timedelta (days = 1 )).replace (
108+ hour = 0 , minute = 0 , second = 0 , microsecond = 0
109+ )
110+
111+ # Generate 7 days of normal data with varying lag (2-8 minutes) to ensure training_stddev > 0
112+ training_lags_minutes = [2 , 3 , 4 , 5 , 6 , 7 , 8 ]
113+ normal_data = []
114+ for i in range (7 ):
115+ event_date = test_started_at - timedelta (days = 14 - i )
116+ event_time = event_date .replace (hour = 12 , minute = 0 , second = 0 , microsecond = 0 )
117+ update_time = event_time + timedelta (minutes = training_lags_minutes [i ])
118+ normal_data .append (
119+ {
120+ EVENT_TIMESTAMP_COLUMN : event_time .strftime (DATE_FORMAT ),
121+ UPDATE_TIMESTAMP_COLUMN : update_time .strftime (DATE_FORMAT ),
122+ }
123+ )
124+
125+ # Generate 7 days of anomalous data with 5-hour lag (detection period)
126+ anomalous_data = []
127+ for i in range (7 ):
128+ event_date = test_started_at - timedelta (days = 7 - i )
129+ event_time = event_date .replace (hour = 12 , minute = 0 , second = 0 , microsecond = 0 )
130+ update_time = event_time + timedelta (hours = 5 )
131+ anomalous_data .append (
132+ {
133+ EVENT_TIMESTAMP_COLUMN : event_time .strftime (DATE_FORMAT ),
134+ UPDATE_TIMESTAMP_COLUMN : update_time .strftime (DATE_FORMAT ),
135+ }
136+ )
137+
138+ all_data = normal_data + anomalous_data
139+
140+ # Test 1: WITHOUT exclusion (should pass - misses the anomaly because it's included in training)
141+ test_args_without_exclusion = {
142+ "event_timestamp_column" : EVENT_TIMESTAMP_COLUMN ,
143+ "update_timestamp_column" : UPDATE_TIMESTAMP_COLUMN ,
144+ "days_back" : 14 , # Scoring window: 14 days to include both training and detection
145+ "backfill_days" : 7 , # Detection period: last 7 days (days 7-1 before test_started_at)
146+ "time_bucket" : {
147+ "period" : "day" ,
148+ "count" : 1 ,
149+ }, # Daily buckets to avoid boundary issues
150+ "sensitivity" : 3 ,
151+ "anomaly_direction" : "spike" , # Explicit direction since we're testing increased lag
152+ "min_training_set_size" : 5 , # Explicit minimum to avoid threshold issues
153+ # exclude_detection_period_from_training is not set (defaults to False/None)
154+ }
155+
156+ test_result_without_exclusion = dbt_project .test (
157+ test_id + "_without_exclusion" ,
158+ TEST_NAME ,
159+ test_args_without_exclusion ,
160+ data = all_data ,
161+ test_vars = {
162+ "custom_run_started_at" : test_started_at .isoformat (),
163+ "force_metrics_backfill" : True ,
164+ },
165+ )
166+
167+ # This should PASS because the anomaly is included in training, making it part of the baseline
168+ assert (
169+ test_result_without_exclusion ["status" ] == "pass"
170+ ), "Test should pass when anomaly is included in training"
171+
172+ # Test 2: WITH exclusion (should fail - detects the anomaly because it's excluded from training)
173+ test_args_with_exclusion = {
174+ ** test_args_without_exclusion ,
175+ "exclude_detection_period_from_training" : True ,
176+ }
177+
178+ test_result_with_exclusion = dbt_project .test (
179+ test_id + "_with_exclusion" ,
180+ TEST_NAME ,
181+ test_args_with_exclusion ,
182+ data = all_data ,
183+ test_vars = {
184+ "custom_run_started_at" : test_started_at .isoformat (),
185+ "force_metrics_backfill" : True ,
186+ },
187+ )
188+
189+ # This should FAIL because the anomaly is excluded from training, so it's detected as anomalous
190+ assert (
191+ test_result_with_exclusion ["status" ] == "fail"
192+ ), "Test should fail when anomaly is excluded from training"
0 commit comments