From 45d822441604f30c788063d9fce0cb131c0f9d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oriol=20Roqu=C3=A9=20Paniagua?= Date: Mon, 10 Feb 2025 13:14:31 +0000 Subject: [PATCH] Merged PR 4326: Remove cancelled Bookings. Adds index in mtd_aggregated_metrics # Description Main changes: * Remove Cancelled Bookings previous logic. * Speeds up Main KPIs by adding a couple of indexes in `mtd_aggregated_metrics`. Tested in prod, to me it looks like it's quite fast now. # Checklist - [X] The edited models and dependants run properly with production data. - [X] The edited models are sufficiently documented. - [X] The edited models contain PK tests, and I've ran and passed them. - [X] I have checked for DRY opportunities with other models and docs. - [X] I've picked the right materialization for the affected models. # Other - [ ] Check if a full-refresh is required after this PR is merged. Related work items: #24637 --- ...hly_aggregated_metrics_history_by_deal.sql | 10 - .../int_mtd_vs_previous_year_metrics.sql | 20 -- ...t_kpis__agg_monthly_cancelled_bookings.sql | 24 -- .../int_kpis__agg_mtd_cancelled_bookings.sql | 24 -- ..._kpis__metric_daily_cancelled_bookings.sql | 24 -- ...pis__metric_monthly_cancelled_bookings.sql | 27 -- ...nt_kpis__metric_mtd_cancelled_bookings.sql | 28 -- models/intermediate/kpis/schema.yml | 304 ------------------ .../general/mtd_aggregated_metrics.sql | 9 + 9 files changed, 9 insertions(+), 461 deletions(-) delete mode 100644 models/intermediate/kpis/int_kpis__agg_monthly_cancelled_bookings.sql delete mode 100644 models/intermediate/kpis/int_kpis__agg_mtd_cancelled_bookings.sql delete mode 100644 models/intermediate/kpis/int_kpis__metric_daily_cancelled_bookings.sql delete mode 100644 models/intermediate/kpis/int_kpis__metric_monthly_cancelled_bookings.sql delete mode 100644 models/intermediate/kpis/int_kpis__metric_mtd_cancelled_bookings.sql diff --git a/models/intermediate/cross/int_monthly_aggregated_metrics_history_by_deal.sql b/models/intermediate/cross/int_monthly_aggregated_metrics_history_by_deal.sql index 3ce4dbb..368258f 100644 --- a/models/intermediate/cross/int_monthly_aggregated_metrics_history_by_deal.sql +++ b/models/intermediate/cross/int_monthly_aggregated_metrics_history_by_deal.sql @@ -31,11 +31,6 @@ with from {{ ref("int_kpis__agg_monthly_check_out_bookings") }} where dimension in ('by_deal') and dimension_value <> 'UNSET' ), - cancelled_bookings as ( - select * - from {{ ref("int_kpis__agg_monthly_cancelled_bookings") }} - where dimension in ('by_deal') and dimension_value <> 'UNSET' - ), billable_bookings as ( select * from {{ ref("int_kpis__agg_monthly_billable_bookings") }} @@ -105,7 +100,6 @@ select check_out_bookings.cancelled_check_out_bookings_rate, -- OTHER BOOKINGS -- - cancelled_bookings.cancelled_bookings, billable_bookings.billable_bookings, -- GUEST JOURNEYS -- @@ -291,10 +285,6 @@ left join check_out_bookings on d.date = check_out_bookings.end_date and d.dimension_value = check_out_bookings.dimension_value -left join - cancelled_bookings - on d.date = cancelled_bookings.end_date - and d.dimension_value = cancelled_bookings.dimension_value left join billable_bookings on d.date = billable_bookings.end_date diff --git a/models/intermediate/cross/int_mtd_vs_previous_year_metrics.sql b/models/intermediate/cross/int_mtd_vs_previous_year_metrics.sql index bd34815..1e114de 100644 --- a/models/intermediate/cross/int_mtd_vs_previous_year_metrics.sql +++ b/models/intermediate/cross/int_mtd_vs_previous_year_metrics.sql @@ -30,19 +30,6 @@ with dimension in ('global', 'by_number_of_listings', 'by_billing_country') and dimension_value <> 'UNSET' ), - cancelled_bookings as ( - select * - from {{ ref("int_kpis__agg_mtd_cancelled_bookings") }} - where - dimension in ('global', 'by_number_of_listings', 'by_billing_country') - and dimension_value <> 'UNSET' - union all - select * - from {{ ref("int_kpis__agg_monthly_cancelled_bookings") }} - where - dimension in ('global', 'by_number_of_listings', 'by_billing_country') - and dimension_value <> 'UNSET' - ), billable_bookings as ( select * from {{ ref("int_kpis__agg_mtd_billable_bookings") }} @@ -205,7 +192,6 @@ with check_out_bookings.cancelled_check_out_bookings_rate, -- OTHER BOOKINGS -- - cancelled_bookings.cancelled_bookings, billable_bookings.billable_bookings, -- GUEST JOURNEYS -- @@ -399,11 +385,6 @@ with on d.date = check_out_bookings.end_date and d.dimension = check_out_bookings.dimension and d.dimension_value = check_out_bookings.dimension_value - left join - cancelled_bookings - on d.date = cancelled_bookings.end_date - and d.dimension = cancelled_bookings.dimension - and d.dimension_value = cancelled_bookings.dimension_value left join billable_bookings on d.date = billable_bookings.end_date @@ -497,7 +478,6 @@ select {{ calculate_safe_relative_increment("cancelled_check_out_bookings_rate") }}, -- OTHER BOOKINGS -- - {{ calculate_safe_relative_increment("cancelled_bookings") }}, {{ calculate_safe_relative_increment("billable_bookings") }}, -- GUEST JOURNEYS -- diff --git a/models/intermediate/kpis/int_kpis__agg_monthly_cancelled_bookings.sql b/models/intermediate/kpis/int_kpis__agg_monthly_cancelled_bookings.sql deleted file mode 100644 index 61c62ef..0000000 --- a/models/intermediate/kpis/int_kpis__agg_monthly_cancelled_bookings.sql +++ /dev/null @@ -1,24 +0,0 @@ -{% set dimensions = get_kpi_dimensions_per_model("CANCELLED_BOOKINGS") %} - -{{ - config( - materialized="table", unique_key=["end_date", "dimension", "dimension_value"] - ) -}} - - -{% for dimension in dimensions %} - select - -- Unique Key -- - start_date, - end_date, - {{ dimension.dimension }} as dimension, - {{ dimension.dimension_value }} as dimension_value, - -- Metrics -- - sum(cancelled_bookings) as cancelled_bookings - from {{ ref("int_kpis__metric_monthly_cancelled_bookings") }} - group by 1, 2, 3, 4 - {% if not loop.last %} - union all - {% endif %} -{% endfor %} diff --git a/models/intermediate/kpis/int_kpis__agg_mtd_cancelled_bookings.sql b/models/intermediate/kpis/int_kpis__agg_mtd_cancelled_bookings.sql deleted file mode 100644 index 097a20a..0000000 --- a/models/intermediate/kpis/int_kpis__agg_mtd_cancelled_bookings.sql +++ /dev/null @@ -1,24 +0,0 @@ -{% set dimensions = get_kpi_dimensions_per_model("CANCELLED_BOOKINGS") %} - -{{ - config( - materialized="table", unique_key=["end_date", "dimension", "dimension_value"] - ) -}} - - -{% for dimension in dimensions %} - select - -- Unique Key -- - start_date, - end_date, - {{ dimension.dimension }} as dimension, - {{ dimension.dimension_value }} as dimension_value, - -- Metrics -- - sum(cancelled_bookings) as cancelled_bookings - from {{ ref("int_kpis__metric_mtd_cancelled_bookings") }} - group by 1, 2, 3, 4 - {% if not loop.last %} - union all - {% endif %} -{% endfor %} diff --git a/models/intermediate/kpis/int_kpis__metric_daily_cancelled_bookings.sql b/models/intermediate/kpis/int_kpis__metric_daily_cancelled_bookings.sql deleted file mode 100644 index 6e7f509..0000000 --- a/models/intermediate/kpis/int_kpis__metric_daily_cancelled_bookings.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config(materialized="table", unique_key=["date", "id_deal"]) }} -select - -- Unique Key -- - icb.updated_date_utc as date, - coalesce(icuh.id_deal, 'UNSET') as id_deal, - -- Dimensions -- - coalesce( - icd.main_billing_country_iso_3_per_deal, 'UNSET' - ) as main_billing_country_iso_3_per_deal, - coalesce( - icmas.active_accommodations_per_deal_segmentation, 'UNSET' - ) as active_accommodations_per_deal_segmentation, - -- Metrics -- - count(distinct icb.id_booking) as cancelled_bookings -from {{ ref("int_core__bookings") }} as icb -left join - {{ ref("int_core__user_host") }} as icuh on icb.id_user_host = icuh.id_user_host -left join {{ ref("int_core__deal") }} as icd on icuh.id_deal = icd.id_deal -left join - {{ ref("int_kpis__dimension_daily_accommodation") }} as icmas - on icuh.id_deal = icmas.id_deal - and icb.updated_date_utc = icmas.date -where upper(icb.booking_state) = {{ var("cancelled_booking_state") }} -group by 1, 2, 3, 4 diff --git a/models/intermediate/kpis/int_kpis__metric_monthly_cancelled_bookings.sql b/models/intermediate/kpis/int_kpis__metric_monthly_cancelled_bookings.sql deleted file mode 100644 index 84e1623..0000000 --- a/models/intermediate/kpis/int_kpis__metric_monthly_cancelled_bookings.sql +++ /dev/null @@ -1,27 +0,0 @@ -{{ - config( - materialized="view", - unique_key=[ - "end_date", - "id_deal", - "active_accommodations_per_deal_segmentation", - ], - ) -}} - -select - -- Unique Key -- - d.first_day_month as start_date, - d.date as end_date, - b.id_deal, - b.active_accommodations_per_deal_segmentation, - -- Dimensions -- - b.main_billing_country_iso_3_per_deal, - -- Metrics -- - sum(b.cancelled_bookings) as cancelled_bookings -from {{ ref("int_kpis__dimension_dates") }} d -left join - {{ ref("int_kpis__metric_daily_cancelled_bookings") }} b - on date_trunc('month', b.date)::date = d.first_day_month -where d.is_end_of_month = true and b.id_deal is not null -group by 1, 2, 3, 4, 5 diff --git a/models/intermediate/kpis/int_kpis__metric_mtd_cancelled_bookings.sql b/models/intermediate/kpis/int_kpis__metric_mtd_cancelled_bookings.sql deleted file mode 100644 index 2ccb35d..0000000 --- a/models/intermediate/kpis/int_kpis__metric_mtd_cancelled_bookings.sql +++ /dev/null @@ -1,28 +0,0 @@ -{{ - config( - materialized="view", - unique_key=[ - "end_date", - "id_deal", - "active_accommodations_per_deal_segmentation", - ], - ) -}} - -select - -- Unique Key -- - d.first_day_month as start_date, - d.date as end_date, - b.id_deal, - b.active_accommodations_per_deal_segmentation, - -- Dimensions -- - b.main_billing_country_iso_3_per_deal, - -- Metrics -- - sum(b.cancelled_bookings) as cancelled_bookings -from {{ ref("int_kpis__dimension_dates") }} d -left join - {{ ref("int_kpis__metric_daily_cancelled_bookings") }} b - on date_trunc('month', b.date)::date = d.first_day_month - and extract(day from b.date) <= d.day -where d.is_month_to_date = true and b.id_deal is not null -group by 1, 2, 3, 4, 5 diff --git a/models/intermediate/kpis/schema.yml b/models/intermediate/kpis/schema.yml index e9c0394..b2b1492 100644 --- a/models/intermediate/kpis/schema.yml +++ b/models/intermediate/kpis/schema.yml @@ -3523,310 +3523,6 @@ models: data_type: bigint description: The month-to-date billable bookings for a given date, dimension and value. - - name: int_kpis__metric_daily_cancelled_bookings - description: | - This model computes the Daily Cancelled Bookings at the deepest granularity. - - The unique key corresponds to the deepest granularity of the model, - in this case: - - date, - - id_deal. - - data_tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - date - - id_deal - - columns: - - name: date - data_type: date - description: Date of when Bookings have been cancelled. - data_tests: - - not_null - - - name: id_deal - data_type: string - description: Unique identifier of an account. - data_tests: - - not_null - - - name: active_accommodations_per_deal_segmentation - data_type: string - description: | - Segment value based on the number of listings booked in 12 months - for a given deal and date. - data_tests: - - not_null - - accepted_values: - values: - - "0" - - "01-05" - - "06-20" - - "21-60" - - "61+" - - "UNSET" - - - name: main_billing_country_iso_3_per_deal - data_type: string - description: | - Main billing country of the host aggregated at Deal level. - data_tests: - - not_null - - - name: cancelled_bookings - data_type: bigint - description: | - Count of daily bookings cancelled in a given date and per specified dimension. - - - name: int_kpis__metric_monthly_cancelled_bookings - description: | - This model computes the Monthly Cancelled Bookings at the - deepest granularity. - Be aware that any dimension that can change over the monthly period, - such as daily segmentations, are included in the primary key of the - model. - - The unique key corresponds to: - - end_date, - - id_deal, - - active_accommodations_per_deal_segmentation. - - data_tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - end_date - - id_deal - - active_accommodations_per_deal_segmentation - - columns: - - name: start_date - data_type: date - description: | - The start date of the time range considered for the metrics in this record. - data_tests: - - not_null - - - name: end_date - data_type: date - description: | - The end date of the time range considered for the metrics in this record. - data_tests: - - not_null - - - name: id_deal - data_type: string - description: Unique identifier of an account. - data_tests: - - not_null - - - name: active_accommodations_per_deal_segmentation - data_type: string - description: | - Segment value based on the number of listings booked in 12 months - for a given deal and date. - data_tests: - - not_null - - accepted_values: - values: - - "0" - - "01-05" - - "06-20" - - "21-60" - - "61+" - - "UNSET" - - - name: main_billing_country_iso_3_per_deal - data_type: string - description: | - Main billing country of the host aggregated at Deal level. - data_tests: - - not_null - - - name: cancelled_bookings - data_type: bigint - description: | - Count of accumulated bookings cancelled in a given month - and per specified dimension. - - - name: int_kpis__metric_mtd_cancelled_bookings - description: | - This model computes the Month-To-Date Cancelled Bookings at the - deepest granularity. - Be aware that any dimension that can change over the monthly period, - such as daily segmentations, are included in the primary key of the - model. - - The unique key corresponds to: - - end_date, - - id_deal, - - active_accommodations_per_deal_segmentation. - - data_tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - end_date - - id_deal - - active_accommodations_per_deal_segmentation - - columns: - - name: start_date - data_type: date - description: | - The start date of the time range considered for the metrics in this record. - data_tests: - - not_null - - - name: end_date - data_type: date - description: | - The end date of the time range considered for the metrics in this record. - data_tests: - - not_null - - - name: id_deal - data_type: string - description: Unique identifier of an account. - data_tests: - - not_null - - - name: active_accommodations_per_deal_segmentation - data_type: string - description: | - Segment value based on the number of listings booked in 12 months - for a given deal and date. - data_tests: - - not_null - - accepted_values: - values: - - "0" - - "01-05" - - "06-20" - - "21-60" - - "61+" - - "UNSET" - - - name: main_billing_country_iso_3_per_deal - data_type: string - description: | - Main billing country of the host aggregated at Deal level. - data_tests: - - not_null - - - name: cancelled_bookings - data_type: bigint - description: | - Count of accumulated bookings cancelled in a given month up to the - given date and per specified dimension. - - - name: int_kpis__agg_monthly_cancelled_bookings - description: | - This model computes the dimension aggregation for - Monthly Cancelled Bookings. - - The primary key of this model is end_date, dimension - and dimension_value. - - data_tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - end_date - - dimension - - dimension_value - - columns: - - name: start_date - data_type: date - description: | - The start date of the time range considered for the metrics in this record. - data_tests: - - not_null - - - name: end_date - data_type: date - description: | - The end date of the time range considered for the metrics in this record. - data_tests: - - not_null - - - name: dimension - data_type: string - description: The dimension or granularity of the metrics. - data_tests: - - assert_dimension_completeness: - metric_column_names: - - cancelled_bookings - - accepted_values: - values: - - global - - by_number_of_listings - - by_billing_country - - by_deal - - - name: dimension_value - data_type: string - description: The value or segment available for the selected dimension. - data_tests: - - not_null - - - name: cancelled_bookings - data_type: bigint - description: The monthly cancelled bookings for a given date, dimension and value. - - - name: int_kpis__agg_mtd_cancelled_bookings - description: | - This model computes the dimension aggregation for - Month-To-Date Cancelled Bookings. - - The primary key of this model is end_date, dimension - and dimension_value. - - data_tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - end_date - - dimension - - dimension_value - - columns: - - name: start_date - data_type: date - description: | - The start date of the time range considered for the metrics in this record. - data_tests: - - not_null - - - name: end_date - data_type: date - description: | - The end date of the time range considered for the metrics in this record. - data_tests: - - not_null - - - name: dimension - data_type: string - description: The dimension or granularity of the metrics. - data_tests: - - assert_dimension_completeness: - metric_column_names: - - cancelled_bookings - - accepted_values: - values: - - global - - by_number_of_listings - - by_billing_country - - by_deal - - - name: dimension_value - data_type: string - description: The value or segment available for the selected dimension. - data_tests: - - not_null - - - name: cancelled_bookings - data_type: bigint - description: The month-to-date cancelled bookings for a given date, dimension and value. - - name: int_kpis__metric_daily_check_in_attributed_guest_journeys description: | This model computes Guest Journey metrics at the deepest granularity diff --git a/models/reporting/general/mtd_aggregated_metrics.sql b/models/reporting/general/mtd_aggregated_metrics.sql index 0ac4405..cd3da5b 100644 --- a/models/reporting/general/mtd_aggregated_metrics.sql +++ b/models/reporting/general/mtd_aggregated_metrics.sql @@ -1,5 +1,14 @@ {% set production_dimensions = get_kpi_dimensions_for_production() %} +{{ + config( + materialized="table", + indexes=[ + {"columns": ["dimension"]}, + {"columns": ["dimension", "date"]}, + ], + ) +}} with dimensions as ( {% for dimension in production_dimensions %}