From 5f92cf8948de36ba5e19ccaa8973ba28833e0db4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oriol=20Roqu=C3=A9=20Paniagua?= Date: Thu, 3 Apr 2025 07:04:35 +0000 Subject: [PATCH] Merged PR 4898: Small adaptations to KPIs Outlier detector test # Description Changes: * If metric is null don't compute test. * Use 1 year data to compare against. * For debugging purposes, compute also the amount of days used in the comparison. The metric is not null is a necessary feature to avoid raising errors while there's no Host Resolutions in month. Keep in mind the test will keep failing and this is EXPECTED because of a massive increase in New Listings: ![image.png](https://guardhog.visualstudio.com/4148d95f-4b6d-4205-bcff-e9c8e0d2ca65/_apis/git/repositories/54ac356f-aad7-46d2-b62c-e8c5b3bb8ebf/pullRequests/4898/attachments/image.png) # Checklist - [X] The edited models and dependants run properly with production data. - [ ] The edited models are sufficiently documented. - [ ] The edited models contain PK tests, and I've ran and passed them. - [ ] I have checked for DRY opportunities with other models and docs. - [ ] I've picked the right materialization for the affected models. # Other - [ ] Check if a full-refresh is required after this PR is merged. If metric is null don't compute test. 1y data to compare against. --- tests/kpis_global_metrics_outlier_detection.sql | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/kpis_global_metrics_outlier_detection.sql b/tests/kpis_global_metrics_outlier_detection.sql index 594c809..9ab2e07 100644 --- a/tests/kpis_global_metrics_outlier_detection.sql +++ b/tests/kpis_global_metrics_outlier_detection.sql @@ -57,7 +57,7 @@ point it becomes too sensitive, just adapt the following parameters. -- means that this test will allow for more variance to be accepted, -- thus it will be more tolerant. -- A lower value means that the chances of detecting outliers --- and false positives will be higher. Recommended around 10. +-- and false positives will be higher. Recommended around 5. {% set detector_tolerance = 5 %} -- Specify here the number of days in the past that will be used @@ -66,7 +66,7 @@ point it becomes too sensitive, just adapt the following parameters. -- it means that we will take 1) all values of the current month -- except the latest update and 2) the end of month figures for the -- past 6 months max. -{% set timeline_to_compare_against = 180 %} +{% set timeline_to_compare_against = 366 %} with max_date as ( @@ -90,6 +90,7 @@ with and dimension = 'Global' and date between max_date -{{ timeline_to_compare_against }} and max_date and metric in {{ metric_names }} + and value is not null ), metrics_to_validate as ( select date, metric, value, abs_daily_value @@ -107,7 +108,8 @@ with 0 ) as lower_bound, avg(abs_daily_value) - + {{ detector_tolerance }} * stddev(abs_daily_value) as upper_bound + + {{ detector_tolerance }} * stddev(abs_daily_value) as upper_bound, + count(1) as days_used_for_computation from metric_data where is_max_date = 0 group by 1 @@ -122,6 +124,7 @@ with mtca.std_daily_value_previous_dates, mtca.lower_bound, mtca.upper_bound, + mtca.days_used_for_computation, case when mtv.abs_daily_value >= mtca.lower_bound