KPIs outlier detector

This commit is contained in:
Joaquin Ossa 2024-11-22 16:55:13 +01:00
parent 059c92b345
commit e44a9c19fc
2 changed files with 99 additions and 16 deletions

View file

@ -1,16 +1,22 @@
with {% set sigma_threshold = var("sigma_threshold", 3) %}
stg_core__country as (select * from {{ ref("stg_core__country") }}), {% set days_to_consider = 14 %}
stg_core__currency as (select * from {{ ref("stg_core__currency") }}) {% set yesterday = "(current_date - interval '1 day')" %}
select
co.id_country, -- with
co.iso_2, -- stg_core__country as (select * from {{ ref("stg_core__country") }}),
co.iso_3, -- stg_core__currency as (select * from {{ ref("stg_core__currency") }})
co.country_name, -- select
co.iso_num_code, -- co.id_country,
co.phone_code, -- co.iso_2,
co.id_preferred_currency, -- co.iso_3,
cu.currency_name as preferred_currency_name, -- co.country_name,
cu.iso4217_code as preferred_iso4217_code, -- co.iso_num_code,
co.dwh_extracted_at_utc -- co.phone_code,
from stg_core__country as co -- co.id_preferred_currency,
left join stg_core__currency cu on cu.id_currency = co.id_preferred_currency -- cu.currency_name as preferred_currency_name,
-- cu.iso4217_code as preferred_iso4217_code,
-- co.dwh_extracted_at_utc
-- from stg_core__country as co
-- left join stg_core__currency cu on cu.id_currency = co.id_preferred_currency
select {{ yesterday }} - interval '{{ days_to_consider }} "days"'

View file

@ -0,0 +1,77 @@
{% set metric_names = (
"created_guest_journeys_not_cancelled",
"started_guest_journeys_not_cancelled",
"completed_guest_journeys_not_cancelled",
"created_guest_journeys",
"started_guest_journeys",
"completed_guest_journeys",
"total_csat_score_count",
"average_csat_score",
"deposit_fees_in_gbp",
"waiver_payments_in_gbp",
"checkin_cover_fees_in_gbp",
"total_guest_payments_in_gbp",
"py_created_guest_journeys_not_cancelled",
"py_started_guest_journeys_not_cancelled",
"py_completed_guest_journeys_not_cancelled",
"py_created_guest_journeys",
"py_started_guest_journeys",
"py_completed_guest_journeys",
"py_total_csat_score_count",
"py_average_csat_score",
"py_deposit_fees_in_gbp",
"py_waiver_payments_in_gbp",
"py_checkin_cover_fees_in_gbp",
"py_total_guest_payments_in_gbp",
) %}
{% set sigma_threshold = var("sigma_threshold", 3) %}
{% set days_to_consider = 14 %}
{% set yesterday = "(current_date - interval '1 day')" %}
with
recent_data as (
select *
from {{ ref("kpis__product_guest_daily_metrics") }}
where
date_day
between {{ yesterday }}
- interval '{{ days_to_consider }} "days"' and {{ yesterday }}
),
metrics_recent_data as (
select
{% for metric in metric_names %}
avg({{ metric }}) as avg_{{ metric }},
stddev({{ metric }}) as stddev_{{ metric }}
{% if not loop.last %},{% endif %}
{% endfor %}
from recent_data
),
outliers as (
select
rd.date_day,
{% for metric in metric_names %}
case
when
abs(rd.{{ metric }} - metrics_recent_data.avg_{{ metric }}) > (
metrics_recent_data.stddev_{{ metric }}
* {{ sigma_threshold }}
)
then 1
else 0
end as is_outlier_{{ metric }}
{% if not loop.last %},{% endif %}
{% endfor %}
from recent_data rd
cross join metrics_recent_data
where rd.date_day = {{ yesterday }}
)
select
{% for metric in metric_names %}
sum(is_outlier_{{ metric }}) as outlier_count_{{ metric }}
{% if not loop.last %},{% endif %}
{% endfor %}
from outliers
having
{% for metric in metric_names %}
sum(is_outlier_{{ metric }}) > 0 {% if not loop.last %} or {% endif %}
{% endfor %}