data-dwh-dbt-project/macros/tests/kpis_daily_outlier_detector.sql

36 lines
1.4 KiB
SQL

{% test kpis_daily_outlier_detector(
model, date_column, column_name, sigma_threshold=3, days_to_consider=14
) %}
with
-- Retrieve recent data based on the defined days_to_consider
recent_data as (
select {{ date_column }}, {{ column_name }}
from {{ model }}
where
{{ date_column }} between (
current_date - interval '{{ days_to_consider + 1 }} days'
) and (current_date - interval '1 day')
),
metrics_stats as (
select
avg({{ column_name }}) as avg_value,
stddev({{ column_name }}) as stddev_value
from recent_data
),
outliers as (
select
{{ column_name }} as value,
(abs({{ column_name }} - metrics_stats.avg_value))
/ (metrics_stats.stddev_value) as absolute_deviation_z_score,
(abs({{ column_name }} - metrics_stats.avg_value))
/ (metrics_stats.stddev_value)
> {{ sigma_threshold }} as is_outlier
from {{ model }}
cross join metrics_stats
where {{ date_column }} = current_date - interval '1 day'
)
-- Return failing rows if any values are flagged as outliers
select *
from outliers
where is_outlier = true
{% endtest %}