144 lines
4.9 KiB
MySQL
144 lines
4.9 KiB
MySQL
|
|
/*
|
||
|
|
This test is applied in the reporting layer for Main KPIs,
|
||
|
|
specifically on reporting.mtd_aggregated_metrics.
|
||
|
|
|
||
|
|
It's supposed to run every day for the latest upate of KPIs.
|
||
|
|
|
||
|
|
There's chances that false positives are risen by these test. If at some
|
||
|
|
point it becomes too sensitive, just adapt the following parameters.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
-- Add here additive metrics that you would like to check
|
||
|
|
-- Recommended to exclude metrics that represent new products,
|
||
|
|
-- since there will be no history to check against.
|
||
|
|
-- Do NOT include rates/percentages/ratios.
|
||
|
|
{% set metric_names = (
|
||
|
|
"Cancelled Bookings",
|
||
|
|
"Checkout Bookings",
|
||
|
|
"Churning Deals",
|
||
|
|
"Churning Listings",
|
||
|
|
"Created Bookings",
|
||
|
|
"Deals Booked in 12 Months",
|
||
|
|
"Deals Booked in 6 Months",
|
||
|
|
"Deals Booked in Month",
|
||
|
|
"Deposit Fees",
|
||
|
|
"Est. Billable Bookings",
|
||
|
|
"First Time Booked Deals",
|
||
|
|
"First Time Booked Listings",
|
||
|
|
"Guest Journey Completed",
|
||
|
|
"Guest Journey Created",
|
||
|
|
"Guest Journey Started",
|
||
|
|
"Guest Journey with Payment",
|
||
|
|
"Guest Payments",
|
||
|
|
"Guest Revenue",
|
||
|
|
"Host Resolutions Amount Paid",
|
||
|
|
"Host Resolutions Payment Count",
|
||
|
|
"Invoiced APIs Revenue",
|
||
|
|
"Invoiced Booking Fees",
|
||
|
|
"Invoiced E-Deposit Fees",
|
||
|
|
"Invoiced Guesty Fees",
|
||
|
|
"Invoiced Listing Fees",
|
||
|
|
"Invoiced Operator Revenue",
|
||
|
|
"Invoiced Verification Fees",
|
||
|
|
"Listings Booked in 12 Months",
|
||
|
|
"Listings Booked in 6 Months",
|
||
|
|
"Listings Booked in Month",
|
||
|
|
"New Deals",
|
||
|
|
"New Listings",
|
||
|
|
"Total Revenue",
|
||
|
|
"Waiver Amount Paid back to Hosts",
|
||
|
|
"Waiver Amount Paid by Guests",
|
||
|
|
"Waiver Net Fees",
|
||
|
|
) %}
|
||
|
|
|
||
|
|
-- Specify here the day of the month that will start to be considered
|
||
|
|
-- for outlier detection. Keep in mind that 1st of every month is quite
|
||
|
|
-- unreliable thus false positives could appear. Recommended minimum 2.
|
||
|
|
{% set start_validating_on_this_day_month = 2 %}
|
||
|
|
|
||
|
|
-- Specify here the strength of the detector. A higher value
|
||
|
|
-- means that this test will allow for more variance to be accepted.
|
||
|
|
-- A lower value means that the chances of detecting outliers
|
||
|
|
-- and false positives will be higher. Recommended around 10.
|
||
|
|
{% set detector_strength = 10 %}
|
||
|
|
|
||
|
|
-- Specify here the number of days in the past that will be used
|
||
|
|
-- to compare against. Keep in mind that we only keep the daily
|
||
|
|
-- information for the current month, thus having 180 days here
|
||
|
|
-- it means that we will take 1) all values of the current month
|
||
|
|
-- except the latest update and 2) the end of month figures for the
|
||
|
|
-- past 6 months max.
|
||
|
|
{% set timeline_to_compare_against = 180 %}
|
||
|
|
|
||
|
|
with
|
||
|
|
max_date as (
|
||
|
|
select max(date) as max_date
|
||
|
|
from {{ ref("mtd_aggregated_metrics") }}
|
||
|
|
-- First days of the month is usually not estable to run
|
||
|
|
where day >= {{ start_validating_on_this_day_month }}
|
||
|
|
|
||
|
|
),
|
||
|
|
metric_data as (
|
||
|
|
select
|
||
|
|
date,
|
||
|
|
metric,
|
||
|
|
value,
|
||
|
|
coalesce(abs(value), 0) / day as abs_daily_value,
|
||
|
|
case when date = max_date then 1 else 0 end as is_max_date
|
||
|
|
from {{ ref("mtd_aggregated_metrics") }}
|
||
|
|
cross join max_date
|
||
|
|
where
|
||
|
|
day >= {{ start_validating_on_this_day_month }}
|
||
|
|
and dimension = 'Global'
|
||
|
|
and date between max_date -{{ timeline_to_compare_against }} and max_date
|
||
|
|
and metric in {{ metric_names }}
|
||
|
|
),
|
||
|
|
metrics_to_validate as (
|
||
|
|
select date, metric, value, abs_daily_value
|
||
|
|
from metric_data
|
||
|
|
where is_max_date = 1
|
||
|
|
),
|
||
|
|
metrics_to_compare_against as (
|
||
|
|
select
|
||
|
|
metric,
|
||
|
|
avg(abs_daily_value) as avg_daily_value_previous_dates,
|
||
|
|
stddev(abs_daily_value) as std_daily_value_previous_dates,
|
||
|
|
greatest(
|
||
|
|
avg(abs_daily_value)
|
||
|
|
- {{ detector_strength }} * stddev(abs_daily_value),
|
||
|
|
0
|
||
|
|
) as lower_bound,
|
||
|
|
avg(abs_daily_value)
|
||
|
|
+ {{ detector_strength }} * stddev(abs_daily_value) as upper_bound
|
||
|
|
from metric_data
|
||
|
|
where is_max_date = 0
|
||
|
|
group by 1
|
||
|
|
),
|
||
|
|
metrics_comparison as (
|
||
|
|
select
|
||
|
|
mtv.date,
|
||
|
|
mtv.metric,
|
||
|
|
mtv.value,
|
||
|
|
mtv.abs_daily_value,
|
||
|
|
mtca.avg_daily_value_previous_dates,
|
||
|
|
mtca.std_daily_value_previous_dates,
|
||
|
|
mtca.lower_bound,
|
||
|
|
mtca.upper_bound,
|
||
|
|
case
|
||
|
|
when
|
||
|
|
mtv.abs_daily_value >= mtca.lower_bound
|
||
|
|
and mtv.abs_daily_value <= mtca.upper_bound
|
||
|
|
then true
|
||
|
|
else false
|
||
|
|
end as is_abs_daily_value_accepted,
|
||
|
|
abs(mtv.abs_daily_value - mtca.avg_daily_value_previous_dates)
|
||
|
|
/ mtca.std_daily_value_previous_dates as signal_to_noise_factor
|
||
|
|
from metrics_to_validate mtv
|
||
|
|
inner join metrics_to_compare_against mtca on mtv.metric = mtca.metric
|
||
|
|
)
|
||
|
|
select *
|
||
|
|
from metrics_comparison
|
||
|
|
where is_abs_daily_value_accepted = false
|
||
|
|
order by signal_to_noise_factor desc
|