# Description Main Changes: * Computes the total booking fees in `int_kpis__metric_daily_invoiced_revenue` * Propagates within KPIs, including schema and tests * Propagates within cross in both "by deal" and "by dimension/global" * Propagates to main kpis tests Small fix: * In `int_mtd_vs_previous_year_metrics`, manual formatting because sqlfmt is broken on this model... # Checklist - [X] The edited models and dependants run properly with production data. - [X] The edited models are sufficiently documented. - [X] The edited models contain PK tests, and I've ran and passed them. - [X] I have checked for DRY opportunities with other models and docs. - [X] I've picked the right materialization for the affected models. # Other - [ ] Check if a full-refresh is required after this PR is merged. Related work items: #28560
140 lines
5 KiB
SQL
140 lines
5 KiB
SQL
/*
|
|
This test is applied in the reporting layer for Main KPIs,
|
|
specifically on reporting.mtd_aggregated_metrics.
|
|
|
|
It's supposed to run every day for the latest upate of KPIs.
|
|
|
|
There's chances that false positives are risen by these test. If at some
|
|
point it becomes too sensitive, just adapt the following parameters.
|
|
|
|
*/
|
|
-- Add here additive metrics that you would like to check
|
|
-- Recommended to exclude metrics that represent new products,
|
|
-- since there will be no history to check against.
|
|
-- Do NOT include rates/percentages/ratios.
|
|
{% set metric_names = (
|
|
"Cancelled Check Out Bookings",
|
|
"Cancelled Created Bookings",
|
|
"Check Out Bookings (Excl. Cancelled)",
|
|
"Created Bookings (Excl. Cancelled)",
|
|
"Damage Host-Waiver Payments",
|
|
"Deposit Fees Revenue",
|
|
"Est. Billable Bookings",
|
|
"First Time Booked Deals",
|
|
"First Time Booked Listings",
|
|
"Guest Journey Completed",
|
|
"Guest Journey Created",
|
|
"Guest Journey Started",
|
|
"Guest Journey with Payment",
|
|
"Guest Revenue",
|
|
"Host Resolutions Amount Paid",
|
|
"Host Resolutions Payment Count",
|
|
"Invoiced APIs Revenue",
|
|
"Invoiced Athena Revenue",
|
|
"Invoiced Old Dashboard Booking Fees Revenue",
|
|
"Invoiced Total Booking Fees Revenue",
|
|
"Invoiced E-Deposit Revenue",
|
|
"Invoiced Listing Fees Revenue",
|
|
"Invoiced Operator Revenue",
|
|
"Invoiced Verification Fees Revenue",
|
|
"New Deals",
|
|
"New Listings",
|
|
"Revenue Retained",
|
|
"Revenue Retained Post-Resolutions",
|
|
"Total Check Out Bookings",
|
|
"Total Created Bookings",
|
|
"Total Revenue",
|
|
"Waiver Revenue",
|
|
"Waiver Retained",
|
|
) %}
|
|
|
|
-- Specify here the day of the month that will start to be considered
|
|
-- for outlier detection. Keep in mind that 1st of every month is quite
|
|
-- unreliable thus false positives could appear. Recommended minimum 2.
|
|
{% set start_validating_on_this_day_month = 2 %}
|
|
|
|
-- Specify here the strength of the detector. A higher value
|
|
-- means that this test will allow for more variance to be accepted,
|
|
-- thus it will be more tolerant.
|
|
-- A lower value means that the chances of detecting outliers
|
|
-- and false positives will be higher. Recommended around 10.
|
|
{% set detector_tolerance = 5 %}
|
|
|
|
-- Specify here the number of days in the past that will be used
|
|
-- to compare against. Keep in mind that we only keep the daily
|
|
-- information for the current month, thus having 180 days here
|
|
-- it means that we will take 1) all values of the current month
|
|
-- except the latest update and 2) the end of month figures for the
|
|
-- past 6 months max.
|
|
{% set timeline_to_compare_against = 180 %}
|
|
|
|
with
|
|
max_date as (
|
|
select max(date) as max_date
|
|
from {{ ref("mtd_aggregated_metrics") }}
|
|
-- First days of the month is usually not estable to run
|
|
where day >= {{ start_validating_on_this_day_month }}
|
|
|
|
),
|
|
metric_data as (
|
|
select
|
|
date,
|
|
metric,
|
|
value,
|
|
coalesce(abs(value), 0) / day as abs_daily_value,
|
|
case when date = max_date then 1 else 0 end as is_max_date
|
|
from {{ ref("mtd_aggregated_metrics") }}
|
|
cross join max_date
|
|
where
|
|
day >= {{ start_validating_on_this_day_month }}
|
|
and dimension = 'Global'
|
|
and date between max_date -{{ timeline_to_compare_against }} and max_date
|
|
and metric in {{ metric_names }}
|
|
),
|
|
metrics_to_validate as (
|
|
select date, metric, value, abs_daily_value
|
|
from metric_data
|
|
where is_max_date = 1
|
|
),
|
|
metrics_to_compare_against as (
|
|
select
|
|
metric,
|
|
avg(abs_daily_value) as avg_daily_value_previous_dates,
|
|
stddev(abs_daily_value) as std_daily_value_previous_dates,
|
|
greatest(
|
|
avg(abs_daily_value)
|
|
- {{ detector_tolerance }} * stddev(abs_daily_value),
|
|
0
|
|
) as lower_bound,
|
|
avg(abs_daily_value)
|
|
+ {{ detector_tolerance }} * stddev(abs_daily_value) as upper_bound
|
|
from metric_data
|
|
where is_max_date = 0
|
|
group by 1
|
|
),
|
|
metrics_comparison as (
|
|
select
|
|
mtv.date,
|
|
mtv.metric,
|
|
mtv.value,
|
|
mtv.abs_daily_value,
|
|
mtca.avg_daily_value_previous_dates,
|
|
mtca.std_daily_value_previous_dates,
|
|
mtca.lower_bound,
|
|
mtca.upper_bound,
|
|
case
|
|
when
|
|
mtv.abs_daily_value >= mtca.lower_bound
|
|
and mtv.abs_daily_value <= mtca.upper_bound
|
|
then true
|
|
else false
|
|
end as is_abs_daily_value_accepted,
|
|
abs(mtv.abs_daily_value - mtca.avg_daily_value_previous_dates)
|
|
/ mtca.std_daily_value_previous_dates as signal_to_noise_factor
|
|
from metrics_to_validate mtv
|
|
inner join metrics_to_compare_against mtca on mtv.metric = mtca.metric
|
|
)
|
|
select *
|
|
from metrics_comparison
|
|
where is_abs_daily_value_accepted = false
|
|
order by signal_to_noise_factor desc
|