diff --git a/macros/tests/kpis_outlier_detector.sql b/macros/tests/kpis_outlier_detector.sql new file mode 100644 index 0000000..1c97be4 --- /dev/null +++ b/macros/tests/kpis_outlier_detector.sql @@ -0,0 +1,33 @@ +{% test kpis_outlier_detector( + model, column_name, sigma_threshold=3, days_to_consider=14 +) %} + with + -- Retrieve recent data based on the defined days_to_consider + recent_data as ( + select * + from {{ model }} + where + date_day between ( + current_date - interval '{{ days_to_consider + 1 }} days' + ) and (current_date - interval '1 day') + ), + metrics_stats as ( + select + avg({{ column_name }}) as avg_value, + stddev({{ column_name }}) as stddev_value + from recent_data + ), + outliers as ( + select + {{ column_name }} as value, + abs({{ column_name }} - metrics_stats.avg_value) + > (metrics_stats.stddev_value * {{ sigma_threshold }}) as is_outlier + from {{ model }} + cross join metrics_stats + where date_day = current_date - interval '1 day' + ) + -- Return failing rows if any values are flagged as outliers + select * + from outliers + where is_outlier = true +{% endtest %} diff --git a/models/intermediate/kpis/int_kpis__product_guest_daily_metrics.sql b/models/intermediate/kpis/int_kpis__product_guest_daily_metrics.sql index a56ed48..d30c291 100644 --- a/models/intermediate/kpis/int_kpis__product_guest_daily_metrics.sql +++ b/models/intermediate/kpis/int_kpis__product_guest_daily_metrics.sql @@ -18,7 +18,7 @@ with sum(gj.created_guest_journeys) as created_guest_journeys, sum(gj.started_guest_journeys) as started_guest_journeys, sum(gj.completed_guest_journeys) as completed_guest_journeys, - sum(gj.count_csat_score) as total_csat_score_count, + sum(gj.count_csat_score) as count_csat_score, case when sum(gj.count_csat_score) > 0 then @@ -53,7 +53,7 @@ with gj.created_guest_journeys, gj.started_guest_journeys, gj.completed_guest_journeys, - gj.total_csat_score_count, + gj.count_csat_score, gj.average_csat_score, gp.deposit_fees_in_gbp, gp.waiver_payments_in_gbp, @@ -93,12 +93,10 @@ select sum(gk.created_guest_journeys) as created_guest_journeys, sum(gk.started_guest_journeys) as started_guest_journeys, sum(gk.completed_guest_journeys) as completed_guest_journeys, - sum(gk.total_csat_score_count) as total_csat_score_count, + sum(gk.count_csat_score) as count_csat_score, case - when sum(gk.total_csat_score_count) > 0 - then - sum(gk.average_csat_score * gk.total_csat_score_count) - / sum(gk.total_csat_score_count) + when sum(gk.count_csat_score) > 0 + then sum(gk.average_csat_score * gk.count_csat_score) / sum(gk.count_csat_score) else null end as average_csat_score, sum(gk.deposit_fees_in_gbp) as deposit_fees_in_gbp, @@ -117,12 +115,12 @@ select sum(gk_py.created_guest_journeys) as py_created_guest_journeys, sum(gk_py.started_guest_journeys) as py_started_guest_journeys, sum(gk_py.completed_guest_journeys) as py_completed_guest_journeys, - sum(gk_py.total_csat_score_count) as py_total_csat_score_count, + sum(gk_py.count_csat_score) as py_count_csat_score, case - when sum(gk_py.total_csat_score_count) > 0 + when sum(gk_py.count_csat_score) > 0 then - sum(gk_py.average_csat_score * gk_py.total_csat_score_count) - / sum(gk_py.total_csat_score_count) + sum(gk_py.average_csat_score * gk_py.count_csat_score) + / sum(gk_py.count_csat_score) else null end as py_average_csat_score, sum(gk_py.deposit_fees_in_gbp) as py_deposit_fees_in_gbp, diff --git a/models/reporting/kpis/kpis__product_guest_daily_metrics.sql b/models/reporting/kpis/kpis__product_guest_daily_metrics.sql index b0878ef..b9fbe42 100644 --- a/models/reporting/kpis/kpis__product_guest_daily_metrics.sql +++ b/models/reporting/kpis/kpis__product_guest_daily_metrics.sql @@ -15,7 +15,7 @@ select created_guest_journeys as created_guest_journeys, started_guest_journeys as started_guest_journeys, completed_guest_journeys as completed_guest_journeys, - total_csat_score_count as total_csat_score_count, + count_csat_score as count_csat_score, average_csat_score as average_csat_score, deposit_fees_in_gbp as deposit_fees_in_gbp, waiver_payments_in_gbp as waiver_payments_in_gbp, @@ -28,7 +28,7 @@ select py_created_guest_journeys as py_created_guest_journeys, py_started_guest_journeys as py_started_guest_journeys, py_completed_guest_journeys as py_completed_guest_journeys, - py_total_csat_score_count as py_total_csat_score_count, + py_count_csat_score as py_count_csat_score, py_average_csat_score as py_average_csat_score, py_deposit_fees_in_gbp as py_deposit_fees_in_gbp, py_waiver_payments_in_gbp as py_waiver_payments_in_gbp, diff --git a/models/reporting/kpis/schema.yml b/models/reporting/kpis/schema.yml index 9f337bc..6acc28e 100644 --- a/models/reporting/kpis/schema.yml +++ b/models/reporting/kpis/schema.yml @@ -66,42 +66,56 @@ models: description: | Count of daily guest journeys created, excluding cancelled bookings, in a given date and per specified dimension. + tests: + - kpis_outlier_detector - name: started_guest_journeys_not_cancelled data_type: bigint description: | Count of daily guest journeys started, excluding cancelled bookings, in a given date and per specified dimension. + tests: + - kpis_outlier_detector - name: completed_guest_journeys_not_cancelled data_type: bigint description: | Count of daily guest journeys completed, excluding cancelled bookings, in a given date and per specified dimension. + tests: + - kpis_outlier_detector - name: created_guest_journeys data_type: bigint description: | Count of daily guest journeys created in a given date and per specified dimension. + tests: + - kpis_outlier_detector - name: started_guest_journeys data_type: bigint description: | Count of daily guest journeys started in a given date and per specified dimension. + tests: + - kpis_outlier_detector - name: completed_guest_journeys data_type: bigint description: | Count of daily guest journeys completed in a given date and per specified dimension. + tests: + - kpis_outlier_detector - name: count_csat_score data_type: bigint description: | Count of daily guest journeys with CSAT (customer satisfaction score) in a given date and per specified dimension. + tests: + - kpis_outlier_detector - name: average_csat_score data_type: bigint @@ -113,24 +127,32 @@ models: description: | Sum of deposit fees paid by guests, without taxes, in GBP in a given date and per specified dimension. + tests: + - kpis_outlier_detector - name: waiver_payments_in_gbp data_type: decimal description: | Sum of waiver payments paid by guests, without taxes, in GBP in a given date and per specified dimension. + tests: + - kpis_outlier_detector - name: checkin_cover_fees_in_gbp data_type: decimal description: | Sum of checkin cover fees paid by guests, without taxes, in GBP in a given date and per specified dimension. + tests: + - kpis_outlier_detector - name: total_guest_payments_in_gbp data_type: decimal description: | Sum of total payments paid by guests, without taxes, in GBP in a given date and per specified dimension. + tests: + - kpis_outlier_detector - name: py_created_guest_journeys_not_cancelled data_type: bigint diff --git a/tests/kpis_product_metrics_outlier_detection.sql b/tests/kpis_product_metrics_outlier_detection.sql deleted file mode 100644 index b218bb9..0000000 --- a/tests/kpis_product_metrics_outlier_detection.sql +++ /dev/null @@ -1,79 +0,0 @@ -/* -This is a test to detect outlier in Guest KPI metrics. -It uses the standard deviation of the values in a -set number of days for all specified metrics -*/ --- Add here the metrics you want to check for outliers --- (metrics that are cumulative or count-based) -{% set metric_names = ( - "created_guest_journeys_not_cancelled", - "started_guest_journeys_not_cancelled", - "completed_guest_journeys_not_cancelled", - "created_guest_journeys", - "started_guest_journeys", - "completed_guest_journeys", - "total_csat_score_count", - "average_csat_score", - "deposit_fees_in_gbp", - "waiver_payments_in_gbp", - "checkin_cover_fees_in_gbp", - "total_guest_payments_in_gbp", -) %} --- Set the number of standard deviations (sigma) allowed --- for a value to be considered an outlier. --- Values beyond this threshold from the average are flagged --- as outliers. -{% set sigma_threshold = var("sigma_threshold", 3) %} --- Set the number of previous days' data (e.g., 14 days) to --- calculate the average and standard deviation for comparison. -{% set days_to_consider = 14 %} - -with - recent_data as ( - select * - from {{ ref("kpis__product_guest_daily_metrics") }} - where - date_day - between (current_date - interval '{{ days_to_consider + 1 }} days') and ( - current_date - interval '1 day' - ) - - ), - metrics_recent_data as ( - select - {% for metric in metric_names %} - avg({{ metric }}) as avg_{{ metric }}, - stddev({{ metric }}) as stddev_{{ metric }} - {% if not loop.last %},{% endif %} - {% endfor %} - from recent_data - ), - outliers as ( - select - rd.date_day, - {% for metric in metric_names %} - case - when - abs(rd.{{ metric }} - metrics_recent_data.avg_{{ metric }}) > ( - metrics_recent_data.stddev_{{ metric }} - * {{ sigma_threshold }} - ) - then 1 - else 0 - end as is_outlier_{{ metric }} - {% if not loop.last %},{% endif %} - {% endfor %} - from recent_data rd - cross join metrics_recent_data - where rd.date_day = current_date - interval '1 day' - ) -select - {% for metric in metric_names %} - sum(is_outlier_{{ metric }}) as outlier_count_{{ metric }} - {% if not loop.last %},{% endif %} - {% endfor %} -from outliers -having - {% for metric in metric_names %} - sum(is_outlier_{{ metric }}) > 0 {% if not loop.last %} or {% endif %} - {% endfor %}