Skip to content

Commit 11fe645

Browse files
Add data_freshness_sla and volume_threshold tests
Add two new Elementary tests: - data_freshness_sla: checks if data was updated before a specified SLA deadline - volume_threshold: monitors row count changes with configurable warn/error thresholds, using Elementary's metric caching to avoid redundant computation Fixes applied: - volume_threshold: union historical metrics with new metrics for comparison - volume_threshold: deterministic dedup with source_priority tiebreaker - volume_threshold: let get_time_bucket handle defaults - data_freshness_sla: treat future-dated data as fresh (remove upper bound) - data_freshness_sla: escape single quotes in where_expression for result_description - data_freshness_sla: simplify deadline_passed logic - data_freshness_sla: document UTC assumption, add ephemeral model check Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent d33eb04 commit 11fe645

File tree

2 files changed

+438
-0
lines changed

2 files changed

+438
-0
lines changed
Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
{#
2+
Test: data_freshness_sla
3+
4+
Verifies that data in a model was updated before a specified SLA deadline time.
5+
Checks the max timestamp value of a specified column in the data itself.
6+
7+
Use case: "Is the data fresh?" / "Was the data updated on time?"
8+
9+
Parameters:
10+
timestamp_column (required): Column name containing timestamps to check for freshness
11+
sla_time (required): Deadline time. Supports formats like "07:00", "7am", "2:30pm", "14:30"
12+
timezone (required): IANA timezone name (e.g., "America/Los_Angeles", "Europe/London")
13+
day_of_week (optional): Day(s) to check. String or list: "Monday", ["Monday", "Wednesday"]
14+
day_of_month (optional): Day(s) of month to check. Integer or list: 1, [1, 15]
15+
where_expression (optional): Additional WHERE clause filter for the data query
16+
17+
Schedule behavior:
18+
- If neither day_of_week nor day_of_month is set: check every day (default)
19+
- If day_of_week is set: only check on those days
20+
- If day_of_month is set: only check on those days
21+
- If both are set: check if today matches EITHER filter (OR logic)
22+
23+
Example usage:
24+
models:
25+
- name: my_model
26+
tests:
27+
- elementary.data_freshness_sla:
28+
timestamp_column: updated_at
29+
sla_time: "07:00"
30+
timezone: "America/Los_Angeles"
31+
32+
- name: daily_events
33+
tests:
34+
- elementary.data_freshness_sla:
35+
timestamp_column: event_timestamp
36+
sla_time: "6am"
37+
timezone: "Europe/Amsterdam"
38+
where_expression: "event_type = 'completed'"
39+
40+
- name: weekly_report_data
41+
tests:
42+
- elementary.data_freshness_sla:
43+
timestamp_column: report_date
44+
sla_time: "09:00"
45+
timezone: "Asia/Tokyo"
46+
day_of_week: ["Monday"]
47+
48+
Test passes if:
49+
- Today is not a scheduled check day (based on day_of_week/day_of_month)
50+
- OR the max timestamp in the data is from today (before or after deadline)
51+
- OR the SLA deadline for today hasn't passed yet
52+
53+
Test fails if:
54+
- Today is a scheduled check day AND the deadline has passed AND:
55+
- No data exists in the table
56+
- The max timestamp is from a previous day (data not updated today)
57+
58+
Important:
59+
- The timestamp_column values are assumed to be in UTC (or timezone-naive timestamps
60+
that represent UTC). If your data stores local timestamps, the comparison against
61+
the SLA deadline (converted to UTC) will be incorrect.
62+
#}
63+
64+
{% test data_freshness_sla(model, timestamp_column, sla_time, timezone, day_of_week=none, day_of_month=none, where_expression=none) %}
65+
{{ config(tags=['elementary-tests']) }}
66+
67+
{%- if execute and elementary.is_test_command() and elementary.is_elementary_enabled() %}
68+
69+
{# Validate required parameters #}
70+
{% if not timestamp_column %}
71+
{{ exceptions.raise_compiler_error("The 'timestamp_column' parameter is required. Example: timestamp_column: 'updated_at'") }}
72+
{% endif %}
73+
74+
{% if not sla_time %}
75+
{{ exceptions.raise_compiler_error("The 'sla_time' parameter is required. Example: sla_time: '07:00'") }}
76+
{% endif %}
77+
78+
{# Validate timezone #}
79+
{% do elementary.validate_timezone(timezone) %}
80+
81+
{# Normalize and validate day filters #}
82+
{% set day_of_week_filter = elementary.normalize_day_of_week(day_of_week) %}
83+
{% set day_of_month_filter = elementary.normalize_day_of_month(day_of_month) %}
84+
85+
{# Get model relation and validate #}
86+
{% set model_relation = elementary.get_model_relation_for_test(model, elementary.get_test_model()) %}
87+
{% if not model_relation %}
88+
{{ exceptions.raise_compiler_error("Unsupported model: " ~ model ~ " (this might happen if you override 'ref' or 'source')") }}
89+
{% endif %}
90+
91+
{%- if elementary.is_ephemeral_model(model_relation) %}
92+
{{ exceptions.raise_compiler_error("Test not supported for ephemeral models: " ~ model_relation.identifier) }}
93+
{%- endif %}
94+
95+
{# Validate timestamp column exists and is a timestamp type #}
96+
{% set timestamp_column_data_type = elementary.find_normalized_data_type_for_column(model_relation, timestamp_column) %}
97+
{% if not elementary.is_column_timestamp(model_relation, timestamp_column, timestamp_column_data_type) %}
98+
{{ exceptions.raise_compiler_error("Column '" ~ timestamp_column ~ "' is not a timestamp type. The timestamp_column must be a timestamp or datetime column.") }}
99+
{% endif %}
100+
101+
{# Parse the SLA time #}
102+
{% set parsed_time = elementary.parse_sla_time(sla_time) %}
103+
{% set formatted_sla_time = elementary.format_sla_time(parsed_time) %}
104+
105+
{# Calculate SLA deadline in UTC (also returns current day info) #}
106+
{% set sla_info = elementary.calculate_sla_deadline_utc(parsed_time.hour, parsed_time.minute, timezone) %}
107+
108+
{# Check if today is a scheduled check day #}
109+
{% set should_check = elementary.should_check_sla_today(
110+
sla_info.day_of_week,
111+
sla_info.day_of_month,
112+
day_of_week_filter,
113+
day_of_month_filter
114+
) %}
115+
116+
{# If today is not a scheduled check day, skip (pass) #}
117+
{% if not should_check %}
118+
{{ elementary.edr_log('Skipping data_freshness_sla test for ' ~ model_relation.identifier ~ ' - not a scheduled check day (' ~ sla_info.day_of_week ~ ', day ' ~ sla_info.day_of_month ~ ')') }}
119+
{{ elementary.no_results_query() }}
120+
{% else %}
121+
122+
{{ elementary.edr_log('Running data_freshness_sla test for ' ~ model_relation.identifier ~ ' with SLA ' ~ formatted_sla_time ~ ' ' ~ timezone) }}
123+
124+
{# Build the query #}
125+
{{ elementary.get_data_freshness_sla_query(
126+
model_relation=model_relation,
127+
timestamp_column=timestamp_column,
128+
sla_deadline_utc=sla_info.sla_deadline_utc,
129+
target_date=sla_info.target_date,
130+
target_date_start_utc=sla_info.target_date_start_utc,
131+
target_date_end_utc=sla_info.target_date_end_utc,
132+
deadline_passed=sla_info.deadline_passed,
133+
formatted_sla_time=formatted_sla_time,
134+
timezone=timezone,
135+
where_expression=where_expression
136+
) }}
137+
138+
{% endif %}
139+
140+
{%- else %}
141+
{{ elementary.no_results_query() }}
142+
{%- endif %}
143+
144+
{% endtest %}
145+
146+
147+
{#
148+
Build SQL query to check if data was updated before SLA deadline.
149+
150+
Logic:
151+
- Query the model table to get MAX(timestamp_column)
152+
- Convert max timestamp to UTC for comparison
153+
- If max timestamp is from today (in target timezone): data is fresh, SLA met
154+
- If deadline hasn't passed yet: Don't fail (still time)
155+
- Otherwise: Data is stale, SLA missed
156+
#}
157+
{% macro get_data_freshness_sla_query(model_relation, timestamp_column, sla_deadline_utc, target_date, target_date_start_utc, target_date_end_utc, deadline_passed, formatted_sla_time, timezone, where_expression) %}
158+
159+
with
160+
161+
sla_deadline as (
162+
select
163+
{{ elementary.edr_cast_as_timestamp("'" ~ sla_deadline_utc ~ "'") }} as deadline_utc,
164+
{{ elementary.edr_cast_as_timestamp("'" ~ target_date_start_utc ~ "'") }} as target_date_start_utc,
165+
{{ elementary.edr_cast_as_timestamp("'" ~ target_date_end_utc ~ "'") }} as target_date_end_utc,
166+
'{{ target_date }}' as target_date
167+
),
168+
169+
{# Get the max timestamp from the data #}
170+
max_data_timestamp as (
171+
select
172+
max({{ elementary.edr_cast_as_timestamp(timestamp_column) }}) as max_timestamp_utc
173+
from {{ model_relation }}
174+
{% if where_expression %}
175+
where {{ where_expression }}
176+
{% endif %}
177+
),
178+
179+
{# Determine freshness status #}
180+
freshness_result as (
181+
select
182+
sd.target_date,
183+
sd.deadline_utc as sla_deadline_utc,
184+
mdt.max_timestamp_utc,
185+
case
186+
when mdt.max_timestamp_utc is null then 'NO_DATA'
187+
{# Data from today or future (e.g. pre-loaded records) counts as fresh #}
188+
when mdt.max_timestamp_utc >= sd.target_date_start_utc then 'DATA_FRESH'
189+
else 'DATA_STALE'
190+
end as freshness_status
191+
from sla_deadline sd
192+
cross join max_data_timestamp mdt
193+
),
194+
195+
final_result as (
196+
select
197+
'{{ model_relation.identifier }}' as model_name,
198+
target_date,
199+
'{{ formatted_sla_time }}' as sla_time,
200+
'{{ timezone }}' as timezone,
201+
cast(sla_deadline_utc as {{ elementary.edr_type_string() }}) as sla_deadline_utc,
202+
freshness_status,
203+
cast(max_timestamp_utc as {{ elementary.edr_type_string() }}) as max_timestamp_utc,
204+
case
205+
when freshness_status = 'DATA_FRESH' then false
206+
{% if not deadline_passed %}
207+
else false
208+
{% else %}
209+
else true
210+
{% endif %}
211+
end as is_failure,
212+
case
213+
when freshness_status = 'NO_DATA' then
214+
'No data found in "{{ model_relation.identifier }}"' ||
215+
{% if where_expression %}
216+
' (with filter: {{ where_expression | replace("'", "''") }})' ||
217+
{% endif %}
218+
'. Expected data to be updated before {{ formatted_sla_time }} {{ timezone }}.'
219+
when freshness_status = 'DATA_STALE' then
220+
'Data in "{{ model_relation.identifier }}" is stale. Last update was at ' ||
221+
cast(max_timestamp_utc as {{ elementary.edr_type_string() }}) ||
222+
' UTC, which is before today. Expected fresh data before {{ formatted_sla_time }} {{ timezone }}.'
223+
else
224+
'Data in "{{ model_relation.identifier }}" is fresh - last update at ' ||
225+
cast(max_timestamp_utc as {{ elementary.edr_type_string() }}) ||
226+
' UTC (before SLA deadline {{ formatted_sla_time }} {{ timezone }}).'
227+
end as result_description
228+
from freshness_result
229+
)
230+
231+
select
232+
model_name,
233+
target_date,
234+
sla_time,
235+
timezone,
236+
sla_deadline_utc,
237+
freshness_status,
238+
max_timestamp_utc,
239+
result_description
240+
from final_result
241+
where is_failure = true
242+
243+
{% endmacro %}

0 commit comments

Comments
 (0)