Merged PR 4326: Remove cancelled Bookings. Adds index in mtd_aggregated_metrics

# Description

Main changes:
* Remove Cancelled Bookings previous logic.
* Speeds up Main KPIs by adding a couple of indexes in `mtd_aggregated_metrics`. Tested in prod, to me it looks like it's quite fast now.

# Checklist

- [X] The edited models and dependants run properly with production data.
- [X] The edited models are sufficiently documented.
- [X] The edited models contain PK tests, and I've ran and passed them.
- [X] I have checked for DRY opportunities with other models and docs.
- [X] I've picked the right materialization for the affected models.

# Other

- [ ] Check if a full-refresh is required after this PR is merged.

Related work items: #24637
This commit is contained in:
Oriol Roqué Paniagua 2025-02-10 13:14:31 +00:00
parent 167261b9ee
commit 45d8224416
9 changed files with 9 additions and 461 deletions

View file

@ -1,24 +0,0 @@
{% set dimensions = get_kpi_dimensions_per_model("CANCELLED_BOOKINGS") %}
{{
config(
materialized="table", unique_key=["end_date", "dimension", "dimension_value"]
)
}}
{% for dimension in dimensions %}
select
-- Unique Key --
start_date,
end_date,
{{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value,
-- Metrics --
sum(cancelled_bookings) as cancelled_bookings
from {{ ref("int_kpis__metric_monthly_cancelled_bookings") }}
group by 1, 2, 3, 4
{% if not loop.last %}
union all
{% endif %}
{% endfor %}

View file

@ -1,24 +0,0 @@
{% set dimensions = get_kpi_dimensions_per_model("CANCELLED_BOOKINGS") %}
{{
config(
materialized="table", unique_key=["end_date", "dimension", "dimension_value"]
)
}}
{% for dimension in dimensions %}
select
-- Unique Key --
start_date,
end_date,
{{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value,
-- Metrics --
sum(cancelled_bookings) as cancelled_bookings
from {{ ref("int_kpis__metric_mtd_cancelled_bookings") }}
group by 1, 2, 3, 4
{% if not loop.last %}
union all
{% endif %}
{% endfor %}

View file

@ -1,24 +0,0 @@
{{ config(materialized="table", unique_key=["date", "id_deal"]) }}
select
-- Unique Key --
icb.updated_date_utc as date,
coalesce(icuh.id_deal, 'UNSET') as id_deal,
-- Dimensions --
coalesce(
icd.main_billing_country_iso_3_per_deal, 'UNSET'
) as main_billing_country_iso_3_per_deal,
coalesce(
icmas.active_accommodations_per_deal_segmentation, 'UNSET'
) as active_accommodations_per_deal_segmentation,
-- Metrics --
count(distinct icb.id_booking) as cancelled_bookings
from {{ ref("int_core__bookings") }} as icb
left join
{{ ref("int_core__user_host") }} as icuh on icb.id_user_host = icuh.id_user_host
left join {{ ref("int_core__deal") }} as icd on icuh.id_deal = icd.id_deal
left join
{{ ref("int_kpis__dimension_daily_accommodation") }} as icmas
on icuh.id_deal = icmas.id_deal
and icb.updated_date_utc = icmas.date
where upper(icb.booking_state) = {{ var("cancelled_booking_state") }}
group by 1, 2, 3, 4

View file

@ -1,27 +0,0 @@
{{
config(
materialized="view",
unique_key=[
"end_date",
"id_deal",
"active_accommodations_per_deal_segmentation",
],
)
}}
select
-- Unique Key --
d.first_day_month as start_date,
d.date as end_date,
b.id_deal,
b.active_accommodations_per_deal_segmentation,
-- Dimensions --
b.main_billing_country_iso_3_per_deal,
-- Metrics --
sum(b.cancelled_bookings) as cancelled_bookings
from {{ ref("int_kpis__dimension_dates") }} d
left join
{{ ref("int_kpis__metric_daily_cancelled_bookings") }} b
on date_trunc('month', b.date)::date = d.first_day_month
where d.is_end_of_month = true and b.id_deal is not null
group by 1, 2, 3, 4, 5

View file

@ -1,28 +0,0 @@
{{
config(
materialized="view",
unique_key=[
"end_date",
"id_deal",
"active_accommodations_per_deal_segmentation",
],
)
}}
select
-- Unique Key --
d.first_day_month as start_date,
d.date as end_date,
b.id_deal,
b.active_accommodations_per_deal_segmentation,
-- Dimensions --
b.main_billing_country_iso_3_per_deal,
-- Metrics --
sum(b.cancelled_bookings) as cancelled_bookings
from {{ ref("int_kpis__dimension_dates") }} d
left join
{{ ref("int_kpis__metric_daily_cancelled_bookings") }} b
on date_trunc('month', b.date)::date = d.first_day_month
and extract(day from b.date) <= d.day
where d.is_month_to_date = true and b.id_deal is not null
group by 1, 2, 3, 4, 5

View file

@ -3523,310 +3523,6 @@ models:
data_type: bigint
description: The month-to-date billable bookings for a given date, dimension and value.
- name: int_kpis__metric_daily_cancelled_bookings
description: |
This model computes the Daily Cancelled Bookings at the deepest granularity.
The unique key corresponds to the deepest granularity of the model,
in this case:
- date,
- id_deal.
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- date
- id_deal
columns:
- name: date
data_type: date
description: Date of when Bookings have been cancelled.
data_tests:
- not_null
- name: id_deal
data_type: string
description: Unique identifier of an account.
data_tests:
- not_null
- name: active_accommodations_per_deal_segmentation
data_type: string
description: |
Segment value based on the number of listings booked in 12 months
for a given deal and date.
data_tests:
- not_null
- accepted_values:
values:
- "0"
- "01-05"
- "06-20"
- "21-60"
- "61+"
- "UNSET"
- name: main_billing_country_iso_3_per_deal
data_type: string
description: |
Main billing country of the host aggregated at Deal level.
data_tests:
- not_null
- name: cancelled_bookings
data_type: bigint
description: |
Count of daily bookings cancelled in a given date and per specified dimension.
- name: int_kpis__metric_monthly_cancelled_bookings
description: |
This model computes the Monthly Cancelled Bookings at the
deepest granularity.
Be aware that any dimension that can change over the monthly period,
such as daily segmentations, are included in the primary key of the
model.
The unique key corresponds to:
- end_date,
- id_deal,
- active_accommodations_per_deal_segmentation.
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- end_date
- id_deal
- active_accommodations_per_deal_segmentation
columns:
- name: start_date
data_type: date
description: |
The start date of the time range considered for the metrics in this record.
data_tests:
- not_null
- name: end_date
data_type: date
description: |
The end date of the time range considered for the metrics in this record.
data_tests:
- not_null
- name: id_deal
data_type: string
description: Unique identifier of an account.
data_tests:
- not_null
- name: active_accommodations_per_deal_segmentation
data_type: string
description: |
Segment value based on the number of listings booked in 12 months
for a given deal and date.
data_tests:
- not_null
- accepted_values:
values:
- "0"
- "01-05"
- "06-20"
- "21-60"
- "61+"
- "UNSET"
- name: main_billing_country_iso_3_per_deal
data_type: string
description: |
Main billing country of the host aggregated at Deal level.
data_tests:
- not_null
- name: cancelled_bookings
data_type: bigint
description: |
Count of accumulated bookings cancelled in a given month
and per specified dimension.
- name: int_kpis__metric_mtd_cancelled_bookings
description: |
This model computes the Month-To-Date Cancelled Bookings at the
deepest granularity.
Be aware that any dimension that can change over the monthly period,
such as daily segmentations, are included in the primary key of the
model.
The unique key corresponds to:
- end_date,
- id_deal,
- active_accommodations_per_deal_segmentation.
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- end_date
- id_deal
- active_accommodations_per_deal_segmentation
columns:
- name: start_date
data_type: date
description: |
The start date of the time range considered for the metrics in this record.
data_tests:
- not_null
- name: end_date
data_type: date
description: |
The end date of the time range considered for the metrics in this record.
data_tests:
- not_null
- name: id_deal
data_type: string
description: Unique identifier of an account.
data_tests:
- not_null
- name: active_accommodations_per_deal_segmentation
data_type: string
description: |
Segment value based on the number of listings booked in 12 months
for a given deal and date.
data_tests:
- not_null
- accepted_values:
values:
- "0"
- "01-05"
- "06-20"
- "21-60"
- "61+"
- "UNSET"
- name: main_billing_country_iso_3_per_deal
data_type: string
description: |
Main billing country of the host aggregated at Deal level.
data_tests:
- not_null
- name: cancelled_bookings
data_type: bigint
description: |
Count of accumulated bookings cancelled in a given month up to the
given date and per specified dimension.
- name: int_kpis__agg_monthly_cancelled_bookings
description: |
This model computes the dimension aggregation for
Monthly Cancelled Bookings.
The primary key of this model is end_date, dimension
and dimension_value.
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- end_date
- dimension
- dimension_value
columns:
- name: start_date
data_type: date
description: |
The start date of the time range considered for the metrics in this record.
data_tests:
- not_null
- name: end_date
data_type: date
description: |
The end date of the time range considered for the metrics in this record.
data_tests:
- not_null
- name: dimension
data_type: string
description: The dimension or granularity of the metrics.
data_tests:
- assert_dimension_completeness:
metric_column_names:
- cancelled_bookings
- accepted_values:
values:
- global
- by_number_of_listings
- by_billing_country
- by_deal
- name: dimension_value
data_type: string
description: The value or segment available for the selected dimension.
data_tests:
- not_null
- name: cancelled_bookings
data_type: bigint
description: The monthly cancelled bookings for a given date, dimension and value.
- name: int_kpis__agg_mtd_cancelled_bookings
description: |
This model computes the dimension aggregation for
Month-To-Date Cancelled Bookings.
The primary key of this model is end_date, dimension
and dimension_value.
data_tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- end_date
- dimension
- dimension_value
columns:
- name: start_date
data_type: date
description: |
The start date of the time range considered for the metrics in this record.
data_tests:
- not_null
- name: end_date
data_type: date
description: |
The end date of the time range considered for the metrics in this record.
data_tests:
- not_null
- name: dimension
data_type: string
description: The dimension or granularity of the metrics.
data_tests:
- assert_dimension_completeness:
metric_column_names:
- cancelled_bookings
- accepted_values:
values:
- global
- by_number_of_listings
- by_billing_country
- by_deal
- name: dimension_value
data_type: string
description: The value or segment available for the selected dimension.
data_tests:
- not_null
- name: cancelled_bookings
data_type: bigint
description: The month-to-date cancelled bookings for a given date, dimension and value.
- name: int_kpis__metric_daily_check_in_attributed_guest_journeys
description: |
This model computes Guest Journey metrics at the deepest granularity