Merged PR 2753: Remove distincts from booking counts

# Description

This PR removes multiple `distinct` statements from counts on the booking metrics.

I'm doing this:
- Because they are making the model query perform terribly, and it's already starting to be way too slow.
- They are not that necessary since the uniqueness of the fields being `distinct`-ed it's already tested in upstream models.

# Checklist

- [X] The edited models and dependants run properly with production data.
- [X] The edited models are sufficiently documented.
- [X] The edited models contain PK tests, and I've ran and passed them.
- [X] I have checked for DRY opportunities with other models and docs.
- [X] I've picked the right materialization for the affected models.

# Other

- [ ] Check if a full-refresh is required after this PR is merged.

Related work items: #20835
This commit is contained in:
Pablo Martín 2024-09-05 14:54:50 +00:00
commit f553e73089

View file

@ -2,7 +2,6 @@
This model provides Month-To-Date (MTD) based on Booking metrics. This model provides Month-To-Date (MTD) based on Booking metrics.
*/ */
{% set dimensions = get_kpi_dimensions() %} {% set dimensions = get_kpi_dimensions() %}
{{ config(materialized="table", unique_key=["date", "dimension", "dimension_value"]) }} {{ config(materialized="table", unique_key=["date", "dimension", "dimension_value"]) }}
@ -17,7 +16,9 @@ with
select * from {{ ref("int_core__booking_charge_events") }} select * from {{ ref("int_core__booking_charge_events") }}
), ),
int_dates_mtd as (select * from {{ ref("int_dates_mtd") }}), int_dates_mtd as (select * from {{ ref("int_dates_mtd") }}),
int_dates_mtd_by_dimension as (select * from {{ ref("int_dates_mtd_by_dimension") }}), int_dates_mtd_by_dimension as (
select * from {{ ref("int_dates_mtd_by_dimension") }}
),
-- Created Bookings MTD -- -- Created Bookings MTD --
created_year_month as ( created_year_month as (
@ -26,18 +27,22 @@ with
d.date, d.date,
{{ dimension.dimension }} as dimension, {{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value, {{ dimension.dimension_value }} as dimension_value,
count(distinct b.id_booking) as created_bookings count(b.id_booking) as created_bookings
from int_dates_mtd d from int_dates_mtd d
inner join int_core__bookings b inner join
int_core__bookings b
on date_trunc('month', b.created_date_utc)::date = d.first_day_month on date_trunc('month', b.created_date_utc)::date = d.first_day_month
and extract(day from b.created_date_utc) <= d.day and extract(day from b.created_date_utc) <= d.day
{% if dimension.dimension == "'by_number_of_listings'" %} {% if dimension.dimension == "'by_number_of_listings'" %}
inner join int_core__user_host u on b.id_user_host = u.id_user_host inner join int_core__user_host u on b.id_user_host = u.id_user_host
inner join int_core__mtd_accommodation_segmentation mas inner join
int_core__mtd_accommodation_segmentation mas
on u.id_deal = mas.id_deal on u.id_deal = mas.id_deal
and d.date = mas.date and d.date = mas.date
{% elif dimension.dimension == "'by_billing_country'" %} {% elif dimension.dimension == "'by_billing_country'" %}
inner join int_core__user_host u on b.id_user_host = u.id_user_host inner join
int_core__user_host u
on b.id_user_host = u.id_user_host
and u.main_billing_country_iso_3_per_deal is not null and u.main_billing_country_iso_3_per_deal is not null
{% endif %} {% endif %}
group by 1, 2, 3 group by 1, 2, 3
@ -53,18 +58,22 @@ with
d.date, d.date,
{{ dimension.dimension }} as dimension, {{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value, {{ dimension.dimension_value }} as dimension_value,
count(distinct b.id_booking) as check_out_bookings count(b.id_booking) as check_out_bookings
from int_dates_mtd d from int_dates_mtd d
inner join int_core__bookings b inner join
int_core__bookings b
on date_trunc('month', b.check_out_date_utc)::date = d.first_day_month on date_trunc('month', b.check_out_date_utc)::date = d.first_day_month
and extract(day from b.check_out_date_utc) <= d.day and extract(day from b.check_out_date_utc) <= d.day
{% if dimension.dimension == "'by_number_of_listings'" %} {% if dimension.dimension == "'by_number_of_listings'" %}
inner join int_core__user_host u on b.id_user_host = u.id_user_host inner join int_core__user_host u on b.id_user_host = u.id_user_host
inner join int_core__mtd_accommodation_segmentation mas inner join
int_core__mtd_accommodation_segmentation mas
on u.id_deal = mas.id_deal on u.id_deal = mas.id_deal
and d.date = mas.date and d.date = mas.date
{% elif dimension.dimension == "'by_billing_country'" %} {% elif dimension.dimension == "'by_billing_country'" %}
inner join int_core__user_host u on b.id_user_host = u.id_user_host inner join
int_core__user_host u
on b.id_user_host = u.id_user_host
and u.main_billing_country_iso_3_per_deal is not null and u.main_billing_country_iso_3_per_deal is not null
{% endif %} {% endif %}
group by 1, 2, 3 group by 1, 2, 3
@ -80,19 +89,23 @@ with
d.date, d.date,
{{ dimension.dimension }} as dimension, {{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value, {{ dimension.dimension_value }} as dimension_value,
count(distinct b.id_booking) as cancelled_bookings count(b.id_booking) as cancelled_bookings
from int_dates_mtd d from int_dates_mtd d
inner join int_core__bookings b inner join
int_core__bookings b
on date_trunc('month', b.updated_date_utc)::date = d.first_day_month on date_trunc('month', b.updated_date_utc)::date = d.first_day_month
and extract(day from b.updated_date_utc) <= d.day and extract(day from b.updated_date_utc) <= d.day
and upper(b.booking_state) = {{ var("cancelled_booking_state") }} and upper(b.booking_state) = {{ var("cancelled_booking_state") }}
{% if dimension.dimension == "'by_number_of_listings'" %} {% if dimension.dimension == "'by_number_of_listings'" %}
inner join int_core__user_host u on b.id_user_host = u.id_user_host inner join int_core__user_host u on b.id_user_host = u.id_user_host
inner join int_core__mtd_accommodation_segmentation mas inner join
int_core__mtd_accommodation_segmentation mas
on u.id_deal = mas.id_deal on u.id_deal = mas.id_deal
and d.date = mas.date and d.date = mas.date
{% elif dimension.dimension == "'by_billing_country'" %} {% elif dimension.dimension == "'by_billing_country'" %}
inner join int_core__user_host u on b.id_user_host = u.id_user_host inner join
int_core__user_host u
on b.id_user_host = u.id_user_host
and u.main_billing_country_iso_3_per_deal is not null and u.main_billing_country_iso_3_per_deal is not null
{% endif %} {% endif %}
group by 1, 2, 3 group by 1, 2, 3
@ -108,10 +121,12 @@ with
d.date, d.date,
{{ dimension.dimension }} as dimension, {{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value, {{ dimension.dimension_value }} as dimension_value,
count(distinct bce.id_booking) as billable_bookings count(bce.id_booking) as billable_bookings
from int_dates_mtd d from int_dates_mtd d
inner join int_core__booking_charge_events bce inner join
on date_trunc('month', bce.booking_fee_charge_date_utc)::date = d.first_day_month int_core__booking_charge_events bce
on date_trunc('month', bce.booking_fee_charge_date_utc)::date
= d.first_day_month
and extract(day from bce.booking_fee_charge_date_utc) <= d.day and extract(day from bce.booking_fee_charge_date_utc) <= d.day
{% if dimension.dimension == "'by_number_of_listings'" %} {% if dimension.dimension == "'by_number_of_listings'" %}
inner join int_core__bookings b on b.id_booking = bce.id_booking inner join int_core__bookings b on b.id_booking = bce.id_booking
@ -122,7 +137,9 @@ with
and d.date = mas.date and d.date = mas.date
{% elif dimension.dimension == "'by_billing_country'" %} {% elif dimension.dimension == "'by_billing_country'" %}
inner join int_core__bookings b on b.id_booking = bce.id_booking inner join int_core__bookings b on b.id_booking = bce.id_booking
inner join int_core__user_host u on b.id_user_host = u.id_user_host inner join
int_core__user_host u
on b.id_user_host = u.id_user_host
and u.main_billing_country_iso_3_per_deal is not null and u.main_billing_country_iso_3_per_deal is not null
{% endif %} {% endif %}
group by 1, 2, 3 group by 1, 2, 3
@ -147,10 +164,10 @@ select
caym.cancelled_bookings, caym.cancelled_bookings,
biym.billable_bookings biym.billable_bookings
from int_dates_mtd_by_dimension d from int_dates_mtd_by_dimension d
left join left join
created_year_month crym created_year_month crym
on crym.date = d.date on crym.date = d.date
and crym.dimension = d.dimension and crym.dimension = d.dimension
and crym.dimension_value = d.dimension_value and crym.dimension_value = d.dimension_value
left join left join
check_out_year_month coym check_out_year_month coym
@ -166,4 +183,4 @@ left join
billable_year_month biym billable_year_month biym
on biym.date = d.date on biym.date = d.date
and biym.dimension = d.dimension and biym.dimension = d.dimension
and biym.dimension_value = d.dimension_value and biym.dimension_value = d.dimension_value