Merged PR 3643: KPIs outlier detector
# Description First attempt at creating a test. This test checks that the new values (from yesterday) don't vary too much from the previous days. The user can set the metrics to check on, how much tolerance to use in number of standard deviations allowed to deviate from the average and the number of days to consider in the evaluation. # Checklist - [x] The edited models and dependants run properly with production data. - [ ] The edited models are sufficiently documented. - [x] The edited models contain PK tests, and I've ran and passed them. - [ ] I have checked for DRY opportunities with other models and docs. - [ ] I've picked the right materialization for the affected models. # Other - [ ] Check if a full-refresh is required after this PR is merged. KPIs outlier detector Related work items: #24590
This commit is contained in:
commit
645958239e
4 changed files with 93 additions and 15 deletions
36
macros/tests/kpis_daily_outlier_detector.sql
Normal file
36
macros/tests/kpis_daily_outlier_detector.sql
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
{% test kpis_daily_outlier_detector(
|
||||
model, date_column, column_name, sigma_threshold=3, days_to_consider=14
|
||||
) %}
|
||||
with
|
||||
-- Retrieve recent data based on the defined days_to_consider
|
||||
recent_data as (
|
||||
select {{ date_column }}, {{ column_name }}
|
||||
from {{ model }}
|
||||
where
|
||||
{{ date_column }} between (
|
||||
current_date - interval '{{ days_to_consider + 1 }} days'
|
||||
) and (current_date - interval '1 day')
|
||||
),
|
||||
metrics_stats as (
|
||||
select
|
||||
avg({{ column_name }}) as avg_value,
|
||||
stddev({{ column_name }}) as stddev_value
|
||||
from recent_data
|
||||
),
|
||||
outliers as (
|
||||
select
|
||||
{{ column_name }} as value,
|
||||
(abs({{ column_name }} - metrics_stats.avg_value))
|
||||
/ (metrics_stats.stddev_value) as absolute_deviation_z_score,
|
||||
(abs({{ column_name }} - metrics_stats.avg_value))
|
||||
/ (metrics_stats.stddev_value)
|
||||
> {{ sigma_threshold }} as is_outlier
|
||||
from {{ model }}
|
||||
cross join metrics_stats
|
||||
where {{ date_column }} = current_date - interval '1 day'
|
||||
)
|
||||
-- Return failing rows if any values are flagged as outliers
|
||||
select *
|
||||
from outliers
|
||||
where is_outlier = true
|
||||
{% endtest %}
|
||||
|
|
@ -18,7 +18,7 @@ with
|
|||
sum(gj.created_guest_journeys) as created_guest_journeys,
|
||||
sum(gj.started_guest_journeys) as started_guest_journeys,
|
||||
sum(gj.completed_guest_journeys) as completed_guest_journeys,
|
||||
sum(gj.count_csat_score) as total_csat_score_count,
|
||||
sum(gj.count_csat_score) as count_csat_score,
|
||||
case
|
||||
when sum(gj.count_csat_score) > 0
|
||||
then
|
||||
|
|
@ -53,7 +53,7 @@ with
|
|||
gj.created_guest_journeys,
|
||||
gj.started_guest_journeys,
|
||||
gj.completed_guest_journeys,
|
||||
gj.total_csat_score_count,
|
||||
gj.count_csat_score,
|
||||
gj.average_csat_score,
|
||||
gp.deposit_fees_in_gbp,
|
||||
gp.waiver_payments_in_gbp,
|
||||
|
|
@ -93,12 +93,10 @@ select
|
|||
sum(gk.created_guest_journeys) as created_guest_journeys,
|
||||
sum(gk.started_guest_journeys) as started_guest_journeys,
|
||||
sum(gk.completed_guest_journeys) as completed_guest_journeys,
|
||||
sum(gk.total_csat_score_count) as total_csat_score_count,
|
||||
sum(gk.count_csat_score) as total_csat_score_count,
|
||||
case
|
||||
when sum(gk.total_csat_score_count) > 0
|
||||
then
|
||||
sum(gk.average_csat_score * gk.total_csat_score_count)
|
||||
/ sum(gk.total_csat_score_count)
|
||||
when sum(gk.count_csat_score) > 0
|
||||
then sum(gk.average_csat_score * gk.count_csat_score) / sum(gk.count_csat_score)
|
||||
else null
|
||||
end as average_csat_score,
|
||||
sum(gk.deposit_fees_in_gbp) as deposit_fees_in_gbp,
|
||||
|
|
@ -117,12 +115,12 @@ select
|
|||
sum(gk_py.created_guest_journeys) as py_created_guest_journeys,
|
||||
sum(gk_py.started_guest_journeys) as py_started_guest_journeys,
|
||||
sum(gk_py.completed_guest_journeys) as py_completed_guest_journeys,
|
||||
sum(gk_py.total_csat_score_count) as py_total_csat_score_count,
|
||||
sum(gk_py.count_csat_score) as py_total_csat_score_count,
|
||||
case
|
||||
when sum(gk_py.total_csat_score_count) > 0
|
||||
when sum(gk_py.count_csat_score) > 0
|
||||
then
|
||||
sum(gk_py.average_csat_score * gk_py.total_csat_score_count)
|
||||
/ sum(gk_py.total_csat_score_count)
|
||||
sum(gk_py.average_csat_score * gk_py.count_csat_score)
|
||||
/ sum(gk_py.count_csat_score)
|
||||
else null
|
||||
end as py_average_csat_score,
|
||||
sum(gk_py.deposit_fees_in_gbp) as py_deposit_fees_in_gbp,
|
||||
|
|
|
|||
|
|
@ -5180,7 +5180,7 @@ models:
|
|||
Count of daily guest journeys completed in a given date and
|
||||
per specified dimension.
|
||||
|
||||
- name: count_csat_score
|
||||
- name: total_csat_score_count
|
||||
data_type: bigint
|
||||
description: |
|
||||
Count of daily guest journeys with CSAT (customer satisfaction score)
|
||||
|
|
@ -5251,7 +5251,7 @@ models:
|
|||
Count of daily guest journeys completed on the same date in the previous year,
|
||||
segmented by the specified dimension.
|
||||
|
||||
- name: py_count_csat_score
|
||||
- name: py_total_csat_score_count
|
||||
data_type: bigint
|
||||
description: |
|
||||
Count of daily guest journeys with CSAT (customer satisfaction score)
|
||||
|
|
|
|||
|
|
@ -66,42 +66,70 @@ models:
|
|||
description: |
|
||||
Count of daily guest journeys created, excluding cancelled bookings,
|
||||
in a given date and per specified dimension.
|
||||
tests:
|
||||
- kpis_daily_outlier_detector:
|
||||
column_name: created_guest_journeys_not_cancelled
|
||||
date_column: date_day
|
||||
|
||||
- name: started_guest_journeys_not_cancelled
|
||||
data_type: bigint
|
||||
description: |
|
||||
Count of daily guest journeys started, excluding cancelled bookings,
|
||||
in a given date and per specified dimension.
|
||||
tests:
|
||||
- kpis_daily_outlier_detector:
|
||||
column_name: started_guest_journeys_not_cancelled
|
||||
date_column: date_day
|
||||
|
||||
- name: completed_guest_journeys_not_cancelled
|
||||
data_type: bigint
|
||||
description: |
|
||||
Count of daily guest journeys completed, excluding cancelled bookings,
|
||||
in a given date and per specified dimension.
|
||||
tests:
|
||||
- kpis_daily_outlier_detector:
|
||||
column_name: completed_guest_journeys_not_cancelled
|
||||
date_column: date_day
|
||||
|
||||
- name: created_guest_journeys
|
||||
data_type: bigint
|
||||
description: |
|
||||
Count of daily guest journeys created in a given date and
|
||||
per specified dimension.
|
||||
tests:
|
||||
- kpis_daily_outlier_detector:
|
||||
column_name: created_guest_journeys
|
||||
date_column: date_day
|
||||
|
||||
- name: started_guest_journeys
|
||||
data_type: bigint
|
||||
description: |
|
||||
Count of daily guest journeys started in a given date and
|
||||
per specified dimension.
|
||||
tests:
|
||||
- kpis_daily_outlier_detector:
|
||||
column_name: started_guest_journeys
|
||||
date_column: date_day
|
||||
|
||||
- name: completed_guest_journeys
|
||||
data_type: bigint
|
||||
description: |
|
||||
Count of daily guest journeys completed in a given date and
|
||||
per specified dimension.
|
||||
tests:
|
||||
- kpis_daily_outlier_detector:
|
||||
column_name: completed_guest_journeys
|
||||
date_column: date_day
|
||||
|
||||
- name: count_csat_score
|
||||
- name: total_csat_score_count
|
||||
data_type: bigint
|
||||
description: |
|
||||
Count of daily guest journeys with CSAT (customer satisfaction score)
|
||||
in a given date and per specified dimension.
|
||||
tests:
|
||||
- kpis_daily_outlier_detector:
|
||||
column_name: total_csat_score_count
|
||||
date_column: date_day
|
||||
|
||||
- name: average_csat_score
|
||||
data_type: bigint
|
||||
|
|
@ -113,24 +141,40 @@ models:
|
|||
description: |
|
||||
Sum of deposit fees paid by guests, without taxes, in GBP
|
||||
in a given date and per specified dimension.
|
||||
tests:
|
||||
- kpis_daily_outlier_detector:
|
||||
column_name: deposit_fees_in_gbp
|
||||
date_column: date_day
|
||||
|
||||
- name: waiver_payments_in_gbp
|
||||
data_type: decimal
|
||||
description: |
|
||||
Sum of waiver payments paid by guests, without taxes, in GBP
|
||||
in a given date and per specified dimension.
|
||||
tests:
|
||||
- kpis_daily_outlier_detector:
|
||||
column_name: waiver_payments_in_gbp
|
||||
date_column: date_day
|
||||
|
||||
- name: checkin_cover_fees_in_gbp
|
||||
data_type: decimal
|
||||
description: |
|
||||
Sum of checkin cover fees paid by guests, without taxes, in GBP
|
||||
in a given date and per specified dimension.
|
||||
tests:
|
||||
- kpis_daily_outlier_detector:
|
||||
column_name: checkin_cover_fees_in_gbp
|
||||
date_column: date_day
|
||||
|
||||
- name: total_guest_payments_in_gbp
|
||||
data_type: decimal
|
||||
description: |
|
||||
Sum of total payments paid by guests, without taxes, in GBP
|
||||
in a given date and per specified dimension.
|
||||
tests:
|
||||
- kpis_daily_outlier_detector:
|
||||
column_name: total_guest_payments_in_gbp
|
||||
date_column: date_day
|
||||
|
||||
- name: py_created_guest_journeys_not_cancelled
|
||||
data_type: bigint
|
||||
|
|
@ -168,7 +212,7 @@ models:
|
|||
Count of daily guest journeys completed on the same date in the previous year,
|
||||
segmented by the specified dimension.
|
||||
|
||||
- name: py_count_csat_score
|
||||
- name: py_total_csat_score_count
|
||||
data_type: bigint
|
||||
description: |
|
||||
Count of daily guest journeys with CSAT (customer satisfaction score)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue