2024-11-26 10:33:03 +01:00
|
|
|
{% test kpis_daily_outlier_detector(
|
|
|
|
|
model, date_column, column_name, sigma_threshold=3, days_to_consider=14
|
2024-11-26 09:27:13 +01:00
|
|
|
) %}
|
|
|
|
|
with
|
|
|
|
|
-- Retrieve recent data based on the defined days_to_consider
|
|
|
|
|
recent_data as (
|
2024-11-26 16:10:41 +01:00
|
|
|
select {{ date_column }}, sum({{ column_name }}) as {{ column_name }}
|
2024-11-26 09:27:13 +01:00
|
|
|
from {{ model }}
|
|
|
|
|
where
|
2024-11-26 10:33:03 +01:00
|
|
|
{{ date_column }} between (
|
2024-11-26 09:27:13 +01:00
|
|
|
current_date - interval '{{ days_to_consider + 1 }} days'
|
|
|
|
|
) and (current_date - interval '1 day')
|
2024-11-26 16:10:41 +01:00
|
|
|
group by {{ date_column }}
|
2024-11-26 09:27:13 +01:00
|
|
|
),
|
|
|
|
|
metrics_stats as (
|
|
|
|
|
select
|
|
|
|
|
avg({{ column_name }}) as avg_value,
|
|
|
|
|
stddev({{ column_name }}) as stddev_value
|
|
|
|
|
from recent_data
|
|
|
|
|
),
|
|
|
|
|
outliers as (
|
|
|
|
|
select
|
|
|
|
|
{{ column_name }} as value,
|
2024-11-26 10:16:08 +01:00
|
|
|
(abs({{ column_name }} - metrics_stats.avg_value))
|
|
|
|
|
/ (metrics_stats.stddev_value) as absolute_deviation_z_score,
|
|
|
|
|
(abs({{ column_name }} - metrics_stats.avg_value))
|
|
|
|
|
/ (metrics_stats.stddev_value)
|
|
|
|
|
> {{ sigma_threshold }} as is_outlier
|
2024-11-26 09:27:13 +01:00
|
|
|
from {{ model }}
|
|
|
|
|
cross join metrics_stats
|
2024-11-26 10:33:03 +01:00
|
|
|
where {{ date_column }} = current_date - interval '1 day'
|
2024-11-26 09:27:13 +01:00
|
|
|
)
|
|
|
|
|
-- Return failing rows if any values are flagged as outliers
|
|
|
|
|
select *
|
|
|
|
|
from outliers
|
|
|
|
|
where is_outlier = true
|
|
|
|
|
{% endtest %}
|