generalized test
This commit is contained in:
parent
aaf512a288
commit
bede588062
5 changed files with 66 additions and 92 deletions
33
macros/tests/kpis_outlier_detector.sql
Normal file
33
macros/tests/kpis_outlier_detector.sql
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
{% test kpis_outlier_detector(
|
||||||
|
model, column_name, sigma_threshold=3, days_to_consider=14
|
||||||
|
) %}
|
||||||
|
with
|
||||||
|
-- Retrieve recent data based on the defined days_to_consider
|
||||||
|
recent_data as (
|
||||||
|
select *
|
||||||
|
from {{ model }}
|
||||||
|
where
|
||||||
|
date_day between (
|
||||||
|
current_date - interval '{{ days_to_consider + 1 }} days'
|
||||||
|
) and (current_date - interval '1 day')
|
||||||
|
),
|
||||||
|
metrics_stats as (
|
||||||
|
select
|
||||||
|
avg({{ column_name }}) as avg_value,
|
||||||
|
stddev({{ column_name }}) as stddev_value
|
||||||
|
from recent_data
|
||||||
|
),
|
||||||
|
outliers as (
|
||||||
|
select
|
||||||
|
{{ column_name }} as value,
|
||||||
|
abs({{ column_name }} - metrics_stats.avg_value)
|
||||||
|
> (metrics_stats.stddev_value * {{ sigma_threshold }}) as is_outlier
|
||||||
|
from {{ model }}
|
||||||
|
cross join metrics_stats
|
||||||
|
where date_day = current_date - interval '1 day'
|
||||||
|
)
|
||||||
|
-- Return failing rows if any values are flagged as outliers
|
||||||
|
select *
|
||||||
|
from outliers
|
||||||
|
where is_outlier = true
|
||||||
|
{% endtest %}
|
||||||
|
|
@ -18,7 +18,7 @@ with
|
||||||
sum(gj.created_guest_journeys) as created_guest_journeys,
|
sum(gj.created_guest_journeys) as created_guest_journeys,
|
||||||
sum(gj.started_guest_journeys) as started_guest_journeys,
|
sum(gj.started_guest_journeys) as started_guest_journeys,
|
||||||
sum(gj.completed_guest_journeys) as completed_guest_journeys,
|
sum(gj.completed_guest_journeys) as completed_guest_journeys,
|
||||||
sum(gj.count_csat_score) as total_csat_score_count,
|
sum(gj.count_csat_score) as count_csat_score,
|
||||||
case
|
case
|
||||||
when sum(gj.count_csat_score) > 0
|
when sum(gj.count_csat_score) > 0
|
||||||
then
|
then
|
||||||
|
|
@ -53,7 +53,7 @@ with
|
||||||
gj.created_guest_journeys,
|
gj.created_guest_journeys,
|
||||||
gj.started_guest_journeys,
|
gj.started_guest_journeys,
|
||||||
gj.completed_guest_journeys,
|
gj.completed_guest_journeys,
|
||||||
gj.total_csat_score_count,
|
gj.count_csat_score,
|
||||||
gj.average_csat_score,
|
gj.average_csat_score,
|
||||||
gp.deposit_fees_in_gbp,
|
gp.deposit_fees_in_gbp,
|
||||||
gp.waiver_payments_in_gbp,
|
gp.waiver_payments_in_gbp,
|
||||||
|
|
@ -93,12 +93,10 @@ select
|
||||||
sum(gk.created_guest_journeys) as created_guest_journeys,
|
sum(gk.created_guest_journeys) as created_guest_journeys,
|
||||||
sum(gk.started_guest_journeys) as started_guest_journeys,
|
sum(gk.started_guest_journeys) as started_guest_journeys,
|
||||||
sum(gk.completed_guest_journeys) as completed_guest_journeys,
|
sum(gk.completed_guest_journeys) as completed_guest_journeys,
|
||||||
sum(gk.total_csat_score_count) as total_csat_score_count,
|
sum(gk.count_csat_score) as count_csat_score,
|
||||||
case
|
case
|
||||||
when sum(gk.total_csat_score_count) > 0
|
when sum(gk.count_csat_score) > 0
|
||||||
then
|
then sum(gk.average_csat_score * gk.count_csat_score) / sum(gk.count_csat_score)
|
||||||
sum(gk.average_csat_score * gk.total_csat_score_count)
|
|
||||||
/ sum(gk.total_csat_score_count)
|
|
||||||
else null
|
else null
|
||||||
end as average_csat_score,
|
end as average_csat_score,
|
||||||
sum(gk.deposit_fees_in_gbp) as deposit_fees_in_gbp,
|
sum(gk.deposit_fees_in_gbp) as deposit_fees_in_gbp,
|
||||||
|
|
@ -117,12 +115,12 @@ select
|
||||||
sum(gk_py.created_guest_journeys) as py_created_guest_journeys,
|
sum(gk_py.created_guest_journeys) as py_created_guest_journeys,
|
||||||
sum(gk_py.started_guest_journeys) as py_started_guest_journeys,
|
sum(gk_py.started_guest_journeys) as py_started_guest_journeys,
|
||||||
sum(gk_py.completed_guest_journeys) as py_completed_guest_journeys,
|
sum(gk_py.completed_guest_journeys) as py_completed_guest_journeys,
|
||||||
sum(gk_py.total_csat_score_count) as py_total_csat_score_count,
|
sum(gk_py.count_csat_score) as py_count_csat_score,
|
||||||
case
|
case
|
||||||
when sum(gk_py.total_csat_score_count) > 0
|
when sum(gk_py.count_csat_score) > 0
|
||||||
then
|
then
|
||||||
sum(gk_py.average_csat_score * gk_py.total_csat_score_count)
|
sum(gk_py.average_csat_score * gk_py.count_csat_score)
|
||||||
/ sum(gk_py.total_csat_score_count)
|
/ sum(gk_py.count_csat_score)
|
||||||
else null
|
else null
|
||||||
end as py_average_csat_score,
|
end as py_average_csat_score,
|
||||||
sum(gk_py.deposit_fees_in_gbp) as py_deposit_fees_in_gbp,
|
sum(gk_py.deposit_fees_in_gbp) as py_deposit_fees_in_gbp,
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ select
|
||||||
created_guest_journeys as created_guest_journeys,
|
created_guest_journeys as created_guest_journeys,
|
||||||
started_guest_journeys as started_guest_journeys,
|
started_guest_journeys as started_guest_journeys,
|
||||||
completed_guest_journeys as completed_guest_journeys,
|
completed_guest_journeys as completed_guest_journeys,
|
||||||
total_csat_score_count as total_csat_score_count,
|
count_csat_score as count_csat_score,
|
||||||
average_csat_score as average_csat_score,
|
average_csat_score as average_csat_score,
|
||||||
deposit_fees_in_gbp as deposit_fees_in_gbp,
|
deposit_fees_in_gbp as deposit_fees_in_gbp,
|
||||||
waiver_payments_in_gbp as waiver_payments_in_gbp,
|
waiver_payments_in_gbp as waiver_payments_in_gbp,
|
||||||
|
|
@ -28,7 +28,7 @@ select
|
||||||
py_created_guest_journeys as py_created_guest_journeys,
|
py_created_guest_journeys as py_created_guest_journeys,
|
||||||
py_started_guest_journeys as py_started_guest_journeys,
|
py_started_guest_journeys as py_started_guest_journeys,
|
||||||
py_completed_guest_journeys as py_completed_guest_journeys,
|
py_completed_guest_journeys as py_completed_guest_journeys,
|
||||||
py_total_csat_score_count as py_total_csat_score_count,
|
py_count_csat_score as py_count_csat_score,
|
||||||
py_average_csat_score as py_average_csat_score,
|
py_average_csat_score as py_average_csat_score,
|
||||||
py_deposit_fees_in_gbp as py_deposit_fees_in_gbp,
|
py_deposit_fees_in_gbp as py_deposit_fees_in_gbp,
|
||||||
py_waiver_payments_in_gbp as py_waiver_payments_in_gbp,
|
py_waiver_payments_in_gbp as py_waiver_payments_in_gbp,
|
||||||
|
|
|
||||||
|
|
@ -66,42 +66,56 @@ models:
|
||||||
description: |
|
description: |
|
||||||
Count of daily guest journeys created, excluding cancelled bookings,
|
Count of daily guest journeys created, excluding cancelled bookings,
|
||||||
in a given date and per specified dimension.
|
in a given date and per specified dimension.
|
||||||
|
tests:
|
||||||
|
- kpis_outlier_detector
|
||||||
|
|
||||||
- name: started_guest_journeys_not_cancelled
|
- name: started_guest_journeys_not_cancelled
|
||||||
data_type: bigint
|
data_type: bigint
|
||||||
description: |
|
description: |
|
||||||
Count of daily guest journeys started, excluding cancelled bookings,
|
Count of daily guest journeys started, excluding cancelled bookings,
|
||||||
in a given date and per specified dimension.
|
in a given date and per specified dimension.
|
||||||
|
tests:
|
||||||
|
- kpis_outlier_detector
|
||||||
|
|
||||||
- name: completed_guest_journeys_not_cancelled
|
- name: completed_guest_journeys_not_cancelled
|
||||||
data_type: bigint
|
data_type: bigint
|
||||||
description: |
|
description: |
|
||||||
Count of daily guest journeys completed, excluding cancelled bookings,
|
Count of daily guest journeys completed, excluding cancelled bookings,
|
||||||
in a given date and per specified dimension.
|
in a given date and per specified dimension.
|
||||||
|
tests:
|
||||||
|
- kpis_outlier_detector
|
||||||
|
|
||||||
- name: created_guest_journeys
|
- name: created_guest_journeys
|
||||||
data_type: bigint
|
data_type: bigint
|
||||||
description: |
|
description: |
|
||||||
Count of daily guest journeys created in a given date and
|
Count of daily guest journeys created in a given date and
|
||||||
per specified dimension.
|
per specified dimension.
|
||||||
|
tests:
|
||||||
|
- kpis_outlier_detector
|
||||||
|
|
||||||
- name: started_guest_journeys
|
- name: started_guest_journeys
|
||||||
data_type: bigint
|
data_type: bigint
|
||||||
description: |
|
description: |
|
||||||
Count of daily guest journeys started in a given date and
|
Count of daily guest journeys started in a given date and
|
||||||
per specified dimension.
|
per specified dimension.
|
||||||
|
tests:
|
||||||
|
- kpis_outlier_detector
|
||||||
|
|
||||||
- name: completed_guest_journeys
|
- name: completed_guest_journeys
|
||||||
data_type: bigint
|
data_type: bigint
|
||||||
description: |
|
description: |
|
||||||
Count of daily guest journeys completed in a given date and
|
Count of daily guest journeys completed in a given date and
|
||||||
per specified dimension.
|
per specified dimension.
|
||||||
|
tests:
|
||||||
|
- kpis_outlier_detector
|
||||||
|
|
||||||
- name: count_csat_score
|
- name: count_csat_score
|
||||||
data_type: bigint
|
data_type: bigint
|
||||||
description: |
|
description: |
|
||||||
Count of daily guest journeys with CSAT (customer satisfaction score)
|
Count of daily guest journeys with CSAT (customer satisfaction score)
|
||||||
in a given date and per specified dimension.
|
in a given date and per specified dimension.
|
||||||
|
tests:
|
||||||
|
- kpis_outlier_detector
|
||||||
|
|
||||||
- name: average_csat_score
|
- name: average_csat_score
|
||||||
data_type: bigint
|
data_type: bigint
|
||||||
|
|
@ -113,24 +127,32 @@ models:
|
||||||
description: |
|
description: |
|
||||||
Sum of deposit fees paid by guests, without taxes, in GBP
|
Sum of deposit fees paid by guests, without taxes, in GBP
|
||||||
in a given date and per specified dimension.
|
in a given date and per specified dimension.
|
||||||
|
tests:
|
||||||
|
- kpis_outlier_detector
|
||||||
|
|
||||||
- name: waiver_payments_in_gbp
|
- name: waiver_payments_in_gbp
|
||||||
data_type: decimal
|
data_type: decimal
|
||||||
description: |
|
description: |
|
||||||
Sum of waiver payments paid by guests, without taxes, in GBP
|
Sum of waiver payments paid by guests, without taxes, in GBP
|
||||||
in a given date and per specified dimension.
|
in a given date and per specified dimension.
|
||||||
|
tests:
|
||||||
|
- kpis_outlier_detector
|
||||||
|
|
||||||
- name: checkin_cover_fees_in_gbp
|
- name: checkin_cover_fees_in_gbp
|
||||||
data_type: decimal
|
data_type: decimal
|
||||||
description: |
|
description: |
|
||||||
Sum of checkin cover fees paid by guests, without taxes, in GBP
|
Sum of checkin cover fees paid by guests, without taxes, in GBP
|
||||||
in a given date and per specified dimension.
|
in a given date and per specified dimension.
|
||||||
|
tests:
|
||||||
|
- kpis_outlier_detector
|
||||||
|
|
||||||
- name: total_guest_payments_in_gbp
|
- name: total_guest_payments_in_gbp
|
||||||
data_type: decimal
|
data_type: decimal
|
||||||
description: |
|
description: |
|
||||||
Sum of total payments paid by guests, without taxes, in GBP
|
Sum of total payments paid by guests, without taxes, in GBP
|
||||||
in a given date and per specified dimension.
|
in a given date and per specified dimension.
|
||||||
|
tests:
|
||||||
|
- kpis_outlier_detector
|
||||||
|
|
||||||
- name: py_created_guest_journeys_not_cancelled
|
- name: py_created_guest_journeys_not_cancelled
|
||||||
data_type: bigint
|
data_type: bigint
|
||||||
|
|
|
||||||
|
|
@ -1,79 +0,0 @@
|
||||||
/*
|
|
||||||
This is a test to detect outlier in Guest KPI metrics.
|
|
||||||
It uses the standard deviation of the values in a
|
|
||||||
set number of days for all specified metrics
|
|
||||||
*/
|
|
||||||
-- Add here the metrics you want to check for outliers
|
|
||||||
-- (metrics that are cumulative or count-based)
|
|
||||||
{% set metric_names = (
|
|
||||||
"created_guest_journeys_not_cancelled",
|
|
||||||
"started_guest_journeys_not_cancelled",
|
|
||||||
"completed_guest_journeys_not_cancelled",
|
|
||||||
"created_guest_journeys",
|
|
||||||
"started_guest_journeys",
|
|
||||||
"completed_guest_journeys",
|
|
||||||
"total_csat_score_count",
|
|
||||||
"average_csat_score",
|
|
||||||
"deposit_fees_in_gbp",
|
|
||||||
"waiver_payments_in_gbp",
|
|
||||||
"checkin_cover_fees_in_gbp",
|
|
||||||
"total_guest_payments_in_gbp",
|
|
||||||
) %}
|
|
||||||
-- Set the number of standard deviations (sigma) allowed
|
|
||||||
-- for a value to be considered an outlier.
|
|
||||||
-- Values beyond this threshold from the average are flagged
|
|
||||||
-- as outliers.
|
|
||||||
{% set sigma_threshold = var("sigma_threshold", 3) %}
|
|
||||||
-- Set the number of previous days' data (e.g., 14 days) to
|
|
||||||
-- calculate the average and standard deviation for comparison.
|
|
||||||
{% set days_to_consider = 14 %}
|
|
||||||
|
|
||||||
with
|
|
||||||
recent_data as (
|
|
||||||
select *
|
|
||||||
from {{ ref("kpis__product_guest_daily_metrics") }}
|
|
||||||
where
|
|
||||||
date_day
|
|
||||||
between (current_date - interval '{{ days_to_consider + 1 }} days') and (
|
|
||||||
current_date - interval '1 day'
|
|
||||||
)
|
|
||||||
|
|
||||||
),
|
|
||||||
metrics_recent_data as (
|
|
||||||
select
|
|
||||||
{% for metric in metric_names %}
|
|
||||||
avg({{ metric }}) as avg_{{ metric }},
|
|
||||||
stddev({{ metric }}) as stddev_{{ metric }}
|
|
||||||
{% if not loop.last %},{% endif %}
|
|
||||||
{% endfor %}
|
|
||||||
from recent_data
|
|
||||||
),
|
|
||||||
outliers as (
|
|
||||||
select
|
|
||||||
rd.date_day,
|
|
||||||
{% for metric in metric_names %}
|
|
||||||
case
|
|
||||||
when
|
|
||||||
abs(rd.{{ metric }} - metrics_recent_data.avg_{{ metric }}) > (
|
|
||||||
metrics_recent_data.stddev_{{ metric }}
|
|
||||||
* {{ sigma_threshold }}
|
|
||||||
)
|
|
||||||
then 1
|
|
||||||
else 0
|
|
||||||
end as is_outlier_{{ metric }}
|
|
||||||
{% if not loop.last %},{% endif %}
|
|
||||||
{% endfor %}
|
|
||||||
from recent_data rd
|
|
||||||
cross join metrics_recent_data
|
|
||||||
where rd.date_day = current_date - interval '1 day'
|
|
||||||
)
|
|
||||||
select
|
|
||||||
{% for metric in metric_names %}
|
|
||||||
sum(is_outlier_{{ metric }}) as outlier_count_{{ metric }}
|
|
||||||
{% if not loop.last %},{% endif %}
|
|
||||||
{% endfor %}
|
|
||||||
from outliers
|
|
||||||
having
|
|
||||||
{% for metric in metric_names %}
|
|
||||||
sum(is_outlier_{{ metric }}) > 0 {% if not loop.last %} or {% endif %}
|
|
||||||
{% endfor %}
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue