79 lines
2.6 KiB
SQL
79 lines
2.6 KiB
SQL
/*
|
|
This is a test to detect outlier in Guest KPI metrics.
|
|
It uses the standard deviation of the values in a
|
|
set number of days for all specified metrics
|
|
*/
|
|
-- Add here the metrics you want to check for outliers
|
|
-- (metrics that are cumulative or count-based)
|
|
{% set metric_names = (
|
|
"created_guest_journeys_not_cancelled",
|
|
"started_guest_journeys_not_cancelled",
|
|
"completed_guest_journeys_not_cancelled",
|
|
"created_guest_journeys",
|
|
"started_guest_journeys",
|
|
"completed_guest_journeys",
|
|
"total_csat_score_count",
|
|
"average_csat_score",
|
|
"deposit_fees_in_gbp",
|
|
"waiver_payments_in_gbp",
|
|
"checkin_cover_fees_in_gbp",
|
|
"total_guest_payments_in_gbp",
|
|
) %}
|
|
-- Set the number of standard deviations (sigma) allowed
|
|
-- for a value to be considered an outlier.
|
|
-- Values beyond this threshold from the average are flagged
|
|
-- as outliers.
|
|
{% set sigma_threshold = var("sigma_threshold", 3) %}
|
|
-- Set the number of previous days' data (e.g., 14 days) to
|
|
-- calculate the average and standard deviation for comparison.
|
|
{% set days_to_consider = 14 %}
|
|
|
|
with
|
|
recent_data as (
|
|
select *
|
|
from {{ ref("kpis__product_guest_daily_metrics") }}
|
|
where
|
|
date_day
|
|
between (current_date - interval '{{ days_to_consider + 1 }} days') and (
|
|
current_date - interval '1 day'
|
|
)
|
|
|
|
),
|
|
metrics_recent_data as (
|
|
select
|
|
{% for metric in metric_names %}
|
|
avg({{ metric }}) as avg_{{ metric }},
|
|
stddev({{ metric }}) as stddev_{{ metric }}
|
|
{% if not loop.last %},{% endif %}
|
|
{% endfor %}
|
|
from recent_data
|
|
),
|
|
outliers as (
|
|
select
|
|
rd.date_day,
|
|
{% for metric in metric_names %}
|
|
case
|
|
when
|
|
abs(rd.{{ metric }} - metrics_recent_data.avg_{{ metric }}) > (
|
|
metrics_recent_data.stddev_{{ metric }}
|
|
* {{ sigma_threshold }}
|
|
)
|
|
then 1
|
|
else 0
|
|
end as is_outlier_{{ metric }}
|
|
{% if not loop.last %},{% endif %}
|
|
{% endfor %}
|
|
from recent_data rd
|
|
cross join metrics_recent_data
|
|
where rd.date_day = current_date - interval '1 day'
|
|
)
|
|
select
|
|
{% for metric in metric_names %}
|
|
sum(is_outlier_{{ metric }}) as outlier_count_{{ metric }}
|
|
{% if not loop.last %},{% endif %}
|
|
{% endfor %}
|
|
from outliers
|
|
having
|
|
{% for metric in metric_names %}
|
|
sum(is_outlier_{{ metric }}) > 0 {% if not loop.last %} or {% endif %}
|
|
{% endfor %}
|