Merged PR 2753: Remove distincts from booking counts

# Description

This PR removes multiple `distinct` statements from counts on the booking metrics.

I'm doing this:
- Because they are making the model query perform terribly, and it's already starting to be way too slow.
- They are not that necessary since the uniqueness of the fields being `distinct`-ed it's already tested in upstream models.

# Checklist

- [X] The edited models and dependants run properly with production data.
- [X] The edited models are sufficiently documented.
- [X] The edited models contain PK tests, and I've ran and passed them.
- [X] I have checked for DRY opportunities with other models and docs.
- [X] I've picked the right materialization for the affected models.

# Other

- [ ] Check if a full-refresh is required after this PR is merged.

Related work items: #20835
This commit is contained in:
Pablo Martín 2024-09-05 14:54:50 +00:00
commit f553e73089

View file

@ -2,7 +2,6 @@
This model provides Month-To-Date (MTD) based on Booking metrics.
*/
{% set dimensions = get_kpi_dimensions() %}
{{ config(materialized="table", unique_key=["date", "dimension", "dimension_value"]) }}
@ -17,7 +16,9 @@ with
select * from {{ ref("int_core__booking_charge_events") }}
),
int_dates_mtd as (select * from {{ ref("int_dates_mtd") }}),
int_dates_mtd_by_dimension as (select * from {{ ref("int_dates_mtd_by_dimension") }}),
int_dates_mtd_by_dimension as (
select * from {{ ref("int_dates_mtd_by_dimension") }}
),
-- Created Bookings MTD --
created_year_month as (
@ -26,18 +27,22 @@ with
d.date,
{{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value,
count(distinct b.id_booking) as created_bookings
count(b.id_booking) as created_bookings
from int_dates_mtd d
inner join int_core__bookings b
inner join
int_core__bookings b
on date_trunc('month', b.created_date_utc)::date = d.first_day_month
and extract(day from b.created_date_utc) <= d.day
{% if dimension.dimension == "'by_number_of_listings'" %}
inner join int_core__user_host u on b.id_user_host = u.id_user_host
inner join int_core__mtd_accommodation_segmentation mas
inner join
int_core__mtd_accommodation_segmentation mas
on u.id_deal = mas.id_deal
and d.date = mas.date
{% elif dimension.dimension == "'by_billing_country'" %}
inner join int_core__user_host u on b.id_user_host = u.id_user_host
inner join
int_core__user_host u
on b.id_user_host = u.id_user_host
and u.main_billing_country_iso_3_per_deal is not null
{% endif %}
group by 1, 2, 3
@ -53,18 +58,22 @@ with
d.date,
{{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value,
count(distinct b.id_booking) as check_out_bookings
count(b.id_booking) as check_out_bookings
from int_dates_mtd d
inner join int_core__bookings b
inner join
int_core__bookings b
on date_trunc('month', b.check_out_date_utc)::date = d.first_day_month
and extract(day from b.check_out_date_utc) <= d.day
{% if dimension.dimension == "'by_number_of_listings'" %}
inner join int_core__user_host u on b.id_user_host = u.id_user_host
inner join int_core__mtd_accommodation_segmentation mas
inner join
int_core__mtd_accommodation_segmentation mas
on u.id_deal = mas.id_deal
and d.date = mas.date
{% elif dimension.dimension == "'by_billing_country'" %}
inner join int_core__user_host u on b.id_user_host = u.id_user_host
inner join
int_core__user_host u
on b.id_user_host = u.id_user_host
and u.main_billing_country_iso_3_per_deal is not null
{% endif %}
group by 1, 2, 3
@ -80,19 +89,23 @@ with
d.date,
{{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value,
count(distinct b.id_booking) as cancelled_bookings
count(b.id_booking) as cancelled_bookings
from int_dates_mtd d
inner join int_core__bookings b
inner join
int_core__bookings b
on date_trunc('month', b.updated_date_utc)::date = d.first_day_month
and extract(day from b.updated_date_utc) <= d.day
and upper(b.booking_state) = {{ var("cancelled_booking_state") }}
{% if dimension.dimension == "'by_number_of_listings'" %}
inner join int_core__user_host u on b.id_user_host = u.id_user_host
inner join int_core__mtd_accommodation_segmentation mas
inner join
int_core__mtd_accommodation_segmentation mas
on u.id_deal = mas.id_deal
and d.date = mas.date
{% elif dimension.dimension == "'by_billing_country'" %}
inner join int_core__user_host u on b.id_user_host = u.id_user_host
inner join
int_core__user_host u
on b.id_user_host = u.id_user_host
and u.main_billing_country_iso_3_per_deal is not null
{% endif %}
group by 1, 2, 3
@ -108,10 +121,12 @@ with
d.date,
{{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value,
count(distinct bce.id_booking) as billable_bookings
count(bce.id_booking) as billable_bookings
from int_dates_mtd d
inner join int_core__booking_charge_events bce
on date_trunc('month', bce.booking_fee_charge_date_utc)::date = d.first_day_month
inner join
int_core__booking_charge_events bce
on date_trunc('month', bce.booking_fee_charge_date_utc)::date
= d.first_day_month
and extract(day from bce.booking_fee_charge_date_utc) <= d.day
{% if dimension.dimension == "'by_number_of_listings'" %}
inner join int_core__bookings b on b.id_booking = bce.id_booking
@ -122,7 +137,9 @@ with
and d.date = mas.date
{% elif dimension.dimension == "'by_billing_country'" %}
inner join int_core__bookings b on b.id_booking = bce.id_booking
inner join int_core__user_host u on b.id_user_host = u.id_user_host
inner join
int_core__user_host u
on b.id_user_host = u.id_user_host
and u.main_billing_country_iso_3_per_deal is not null
{% endif %}
group by 1, 2, 3
@ -147,10 +164,10 @@ select
caym.cancelled_bookings,
biym.billable_bookings
from int_dates_mtd_by_dimension d
left join
created_year_month crym
left join
created_year_month crym
on crym.date = d.date
and crym.dimension = d.dimension
and crym.dimension = d.dimension
and crym.dimension_value = d.dimension_value
left join
check_out_year_month coym
@ -166,4 +183,4 @@ left join
billable_year_month biym
on biym.date = d.date
and biym.dimension = d.dimension
and biym.dimension_value = d.dimension_value
and biym.dimension_value = d.dimension_value