data-dwh-dbt-project/models/intermediate/kpis/int_kpis__agg_monthly_churn_contribution.sql
Oriol Roqué Paniagua e8a2fb1ae5 Merged PR 4926: KPIs Refactor Stage 3 - Creates dedicated Churn model in KPIs
# Description

Creates a new model in intermediate/kpis called `int_kpis__agg_monthly_churn_contribution`. This follows the convention (aggregated = by dimension and dimension value) and monthly (1 dimension value per dimension per date where date is unique in month).

This single model aims to substitute, in the future, the 2 churn models simultaneously:
* `int_monthly_12m_window_contribution_by_deal`
* `int_monthly_churn_metrics`

Main changes:
* There's no longer a dependency with `int_monthly_aggregated_metrics_history_by_deal`. Rather, it gets the metrics and dimensions from wherever needed and nothing else.
* I also took the opportunity to clean any code that was not needed after combining both models, resulting in dropping a whole CTE.
* Updated schema description for clarification purposes.

The output of the new model is identical to the 2nd model, `int_monthly_churn_metrics`, confirmed with the md5 trick. Model runs in 1.45 seconds in my laptop so no performance issue (it's technically faster than running the 2 current models which is around 6.3 seconds but whatever).

Note that this has NO effect yet on production. The switch will be handled in a separated PR.

# Checklist

- [X] The edited models and dependants run properly with production data.
- [X] The edited models are sufficiently documented.
- [X] The edited models contain PK tests, and I've ran and passed them.
- [X] I have checked for DRY opportunities with other models and docs.
- [X] I've picked the right materialization for the affected models.

# Other

- [ ] Check if a full-refresh is required after this PR is merged.

Related work items: #28948
2025-04-04 14:02:32 +00:00

236 lines
9.6 KiB
SQL

{% set dimensions = get_kpi_dimensions_per_model("CHURN_RATES") %}
{% set churn_lifecycle_states = "('05-Churning')" %}
{{ config(materialized="table", unique_key=["date", "dimension", "dimension_value"]) }}
with
int_kpis__lifecycle_daily_deal as (
select * from {{ ref("int_kpis__lifecycle_daily_deal") }}
),
int_kpis__agg_dates_main_kpis as (
select * from {{ ref("int_kpis__agg_dates_main_kpis") }}
),
created_bookings as (
select *
from {{ ref("int_kpis__agg_monthly_created_bookings") }}
where dimension in ('by_deal') and dimension_value <> 'UNSET'
),
listings as (
select *
from {{ ref("int_kpis__agg_daily_listings") }}
where
dimension in ('by_deal')
and dimension_value <> 'UNSET'
and is_end_of_month = true
),
total_and_retained_revenue as (
select *
from {{ ref("int_kpis__agg_monthly_total_and_retained_revenue") }}
where dimension in ('by_deal') and dimension_value <> 'UNSET'
),
int_kpis__dimension_daily_accommodation as (
select * from {{ ref("int_kpis__dimension_daily_accommodation") }}
),
int_kpis__dimension_deals as (select * from {{ ref("int_kpis__dimension_deals") }}),
deal_date_per_month as (
select d.date, dimension_value as id_deal
from int_kpis__agg_dates_main_kpis d
where
d.dimension = 'by_deal'
and d.dimension_value <> 'UNSET'
and d.is_end_of_month = true
),
aggregated_metrics_by_deal_over_12_months as (
select
d.date,
d.id_deal,
deal_lifecycle.deal_lifecycle_state,
1.0 * count(*) over (
partition by d.id_deal
order by d.date
rows between 12 preceding and 1 preceding
) as preceding_months_count_by_deal,
1.0 * sum(total_and_retained_revenue.total_revenue_in_gbp) over (
partition by d.id_deal
order by d.date
rows between 12 preceding and 1 preceding
) as sum_total_revenue_preceding_12_months,
1.0 * sum(created_bookings.created_bookings) over (
partition by d.id_deal
order by d.date
rows between 12 preceding and 1 preceding
) as sum_created_bookings_preceding_12_months,
1.0 * sum(listings.listings_booked_in_month) over (
partition by d.id_deal
order by d.date
rows between 12 preceding and 1 preceding
) as sum_listings_booked_in_month_preceding_12_months
from deal_date_per_month d
left join
int_kpis__lifecycle_daily_deal deal_lifecycle
on d.date = deal_lifecycle.date
and d.id_deal = deal_lifecycle.id_deal
left join
created_bookings
on d.date = created_bookings.end_date
and d.id_deal = created_bookings.dimension_value
left join
listings on d.date = listings.date and d.id_deal = listings.dimension_value
left join
total_and_retained_revenue
on d.date = total_and_retained_revenue.end_date
and d.id_deal = total_and_retained_revenue.dimension_value
),
global_computation_over_12_months as (
select
ambd.id_deal,
ambd.date,
ambd.deal_lifecycle_state,
-- 12 months window sum of metric divided by amount of active months per
-- deal
ambd.sum_total_revenue_preceding_12_months
/ ambd.preceding_months_count_by_deal
as avg_total_revenue_preceding_12_months,
ambd.sum_created_bookings_preceding_12_months
/ ambd.preceding_months_count_by_deal
as avg_created_bookings_preceding_12_months,
ambd.sum_listings_booked_in_month_preceding_12_months
/ ambd.preceding_months_count_by_deal
as avg_listings_booked_in_month_preceding_12_months,
-- Global amounts by sum of the 12 months window sum of metric on a given
-- date
sum(
ambd.sum_total_revenue_preceding_12_months
/ ambd.preceding_months_count_by_deal
) over (partition by ambd.date)
as avg_global_total_revenue_preceding_12_months,
sum(
ambd.sum_created_bookings_preceding_12_months
/ ambd.preceding_months_count_by_deal
) over (partition by ambd.date)
as avg_global_created_bookings_preceding_12_months,
sum(
ambd.sum_listings_booked_in_month_preceding_12_months
/ ambd.preceding_months_count_by_deal
) over (partition by ambd.date)
as avg_global_listings_booked_in_month_preceding_12_months
from aggregated_metrics_by_deal_over_12_months ambd
),
churn_metrics_per_date as (
{% for dimension in dimensions %}
select
m12wc.date,
{{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value,
-- Revenue Churn 12m rolling window (absolute figures) --
sum(
case
when m12wc.deal_lifecycle_state in {{ churn_lifecycle_states }}
then coalesce(avg_total_revenue_preceding_12_months, 0)
else 0
end
) as total_revenue_churn_preceding_12_months,
-- Global Revenue 12m rolling window --
max(
coalesce(avg_global_total_revenue_preceding_12_months, 0)
) as total_revenue_global_preceding_12_months,
-- Churn Rates --
sum(
case
when m12wc.deal_lifecycle_state in {{ churn_lifecycle_states }}
then
coalesce(
avg_total_revenue_preceding_12_months / nullif(
avg_global_total_revenue_preceding_12_months, 0
),
0
)
else 0
end
) as total_revenue_churn_average_contribution,
sum(
case
when m12wc.deal_lifecycle_state in {{ churn_lifecycle_states }}
then
coalesce(
avg_created_bookings_preceding_12_months / nullif(
avg_global_created_bookings_preceding_12_months, 0
),
0
)
else 0
end
) as created_bookings_churn_average_contribution,
sum(
case
when m12wc.deal_lifecycle_state in {{ churn_lifecycle_states }}
then
coalesce(
avg_listings_booked_in_month_preceding_12_months
/ nullif(
avg_global_listings_booked_in_month_preceding_12_months,
0
),
0
)
else 0
end
) as listings_booked_in_month_churn_average_contribution
from global_computation_over_12_months m12wc
{% if dimension.dimension == "'by_number_of_listings'" %}
inner join
int_kpis__dimension_daily_accommodation dda
on m12wc.id_deal = dda.id_deal
and m12wc.date = dda.date
{% elif dimension.dimension == "'by_billing_country'" %}
inner join
int_kpis__dimension_deals ud
on m12wc.id_deal = ud.id_deal
and ud.main_billing_country_iso_3_per_deal is not null
{% endif %}
where deal_lifecycle_state is not null
group by 1, 2, 3
{% if not loop.last %}
union all
{% endif %}
{% endfor %}
)
-- Final aggregation of subqueries --
select
d.year,
d.month,
d.day,
d.date,
d.dimension,
d.dimension_value,
d.is_end_of_month,
d.is_current_month,
c.total_revenue_churn_preceding_12_months,
c.total_revenue_global_preceding_12_months,
cast(
c.total_revenue_churn_average_contribution as numeric(19, 6)
) as total_revenue_churn_average_contribution,
cast(
c.created_bookings_churn_average_contribution as numeric(19, 6)
) as created_bookings_churn_average_contribution,
cast(
c.listings_booked_in_month_churn_average_contribution as numeric(19, 6)
) as listings_booked_in_month_churn_average_contribution
from int_kpis__agg_dates_main_kpis d
left join
churn_metrics_per_date c
on c.date = d.date
and c.dimension = d.dimension
and c.dimension_value = d.dimension_value
where
-- Remove current month dates since data won't be available anyway. This is specific
-- for this churn metrics model
d.is_current_month = false
-- Apply only dimensions relevant for Churn Rates
and d.dimension in ('global', 'by_number_of_listings', 'by_billing_country')
and d.dimension_value <> 'UNSET'