data-dwh-dbt-project/models/intermediate/kpis/int_kpis__lifecycle_daily_deal.sql
Oriol Roqué Paniagua d26e08bffd Merged PR 3909: Fixes created_date_utc and updated_date_utc in SH User and its dependencies
# Description

The model `stg_core__superhog_user` contained the fields `created_date_utc` and `updated_date_utc`, which were actually timestamps, not dates. This created issues a while ago in KPIs that were fixed by forcing a date conversion at `int_kpis` level, but the source issue remained unfixed.

This PR just creates the proper fields in staging, meaning:
* `created_at_utc` and `created_date_utc`
* `updated_at_utc` and `updated_date_utc`

And propagates the changes through all dependencies. On KPIs, it also removes the hardcoded date conversion since it's no longer needed. It also adds the different schema entries on which these fields were already defined. Note I didn't update the staging entry since there's no entry for the rest of the fields.

I recommend checking this PR in the commits order.

What I've tested:
* Unified User and User Host have plenty of dependencies that I've checked one by one, at each dependency layer by using dbt docs.
* Run `core__unified_user` and the full execution of +`mtd_aggregated_metrics`

What clearly will fail:
* There's the famous `Users_dashboard` PBI report that shouldn't exist that has a dependency with `core__unified_user`. This report will fail. But it does not exist. I'd like to drop it once and forever

# Checklist

- [X] The edited models and dependants run properly with production data. **See above**
- [X] The edited models are sufficiently documented.
- [X] The edited models contain PK tests, and I've ran and passed them.
- [X] I have checked for DRY opportunities with other models and docs.
- [X] I've picked the right materialization for the affected models.

# Other

- [X] Check if a full-refresh is required after this PR is merged. **The staging model is incremental!**

Related work items: #23703
2024-12-31 08:04:03 +00:00

219 lines
9.1 KiB
SQL

/*
This model provides Month-To-Date (MTD) deal lifecycle based on booking metrics.
Assumes a host will have a deal being filled.
*/
{{ config(materialized="table", unique_key=["date", "id_deal"]) }}
with
int_core__bookings as (select * from {{ ref("int_core__bookings") }}),
int_core__user_host as (select * from {{ ref("int_core__user_host") }}),
int_kpis__dimension_dates as (select * from {{ ref("int_kpis__dimension_dates") }}),
int_hubspot__deal as (select * from {{ ref("int_hubspot__deal") }}),
hubspot_deal_offboardings as (
-- At the moment it's not possible to account for deal reactivation within
-- hubspot, meaning, once a deal has a cancellation date we'll be considering
-- it as churned. However, a deal can be reactivated and if so, the
-- cancellation date might be set up to null, thus loosing the previous
-- history of offboarding and affecting backwards history. Lastly,
-- cancellation date not being null is preferred vs. the hubspot deal stage
-- since an actual churned deal might be in a process of reactivation,
-- discussing with our colleagues from sales / am teams. In this discussion
-- period, the stage could be different than cancelled, but still not
-- reactivated.
select id_deal, cancellation_date_utc
from int_hubspot__deal
where cancellation_date_utc is not null
),
booked_days_per_deal as (
select
icuh.id_deal,
icb.created_date_utc,
lag(icb.created_date_utc, 1) over (
partition by icuh.id_deal order by icb.created_date_utc asc
) as previous_booked_date
from int_core__bookings icb
inner join int_core__user_host icuh on icb.id_user_host = icuh.id_user_host
where icuh.id_deal is not null
group by icuh.id_deal, icb.created_date_utc
),
deal_historic_booking_dates as (
select
d.date,
h.id_deal,
min(h.created_date_utc) as creation_date_utc,
min(b.created_date_utc) as first_time_booked_date_utc,
max(b.created_date_utc) as last_time_booked_date_utc,
max(b.previous_booked_date) as second_to_last_time_booked_date_utc
from int_kpis__dimension_dates d
inner join int_core__user_host h on d.date >= h.created_date_utc
left join
booked_days_per_deal b
on h.id_deal = b.id_deal
and d.date >= b.created_date_utc
where h.id_deal is not null
group by d.date, h.id_deal
),
deal_historic_features as (
select
hhbf.date,
hhbf.id_deal,
hhbf.creation_date_utc,
hhbf.first_time_booked_date_utc,
hhbf.last_time_booked_date_utc,
hhbf.second_to_last_time_booked_date_utc,
hdo.cancellation_date_utc,
case
when hhbf.date >= hdo.cancellation_date_utc then true else false
end as deal_has_been_offboarded,
case
when
date_trunc('month', hdo.cancellation_date_utc)
= date_trunc('month', hhbf.date)
then true
else false
end as deal_was_offboarded_this_month,
case
when hhbf.first_time_booked_date_utc is not null then true else false
end as deal_has_at_least_one_booking,
case
when
date_trunc('month', hhbf.creation_date_utc)
= date_trunc('month', hhbf.date)
then true
else false
end as deal_was_created_this_month,
case
when
date_trunc('month', hhbf.first_time_booked_date_utc)
= date_trunc('month', hhbf.last_time_booked_date_utc)
then true
else false
end as deal_has_first_booking,
case
when
date_trunc('month', hhbf.last_time_booked_date_utc)
= date_trunc('month', hhbf.date)
then true
else false
end as has_been_booked_within_current_month,
case
when
date_trunc('month', hhbf.last_time_booked_date_utc)
+ interval '6 months'
> date_trunc('month', hhbf.date)
then true
else false
end as has_been_booked_within_last_6_months,
case
when
date_trunc('month', hhbf.last_time_booked_date_utc)
+ interval '12 months'
> date_trunc('month', hhbf.date)
then true
else false
end as has_been_booked_within_last_12_months,
case
when
date_trunc('month', hhbf.last_time_booked_date_utc)
+ interval '12 months'
= date_trunc('month', hhbf.date)
then true
else false
end as last_booking_was_12_months_ago,
case
when
date_trunc('month', hhbf.second_to_last_time_booked_date_utc)
+ interval '12 months'
< date_trunc('month', hhbf.last_time_booked_date_utc)
then true
else false
end as had_previous_booking_more_than_12_months_before_the_last
from deal_historic_booking_dates hhbf
left join hubspot_deal_offboardings hdo on hhbf.id_deal = hdo.id_deal
)
select
date,
id_deal,
creation_date_utc,
first_time_booked_date_utc,
last_time_booked_date_utc,
second_to_last_time_booked_date_utc,
cancellation_date_utc,
case
-- 01-New: The deal has been created this month and has not had any booking.
-- Additionally, the deal has not been offboarded in hubspot.
when
not deal_has_at_least_one_booking
and deal_was_created_this_month
and not deal_has_been_offboarded
then '01-New'
-- 02-Never Booked: The deal has been created before this month and has not
-- had any booking. Additionally, the deal has not been offboarded in hubspot.
when
not deal_has_at_least_one_booking
and not deal_was_created_this_month
and not deal_has_been_offboarded
then '02-Never Booked'
-- 03-First Time Booked: The deal has been booked for the first time and it
-- has been created this month. Additionally, the deal has not been offboarded
-- in hubspot.
when
deal_has_first_booking
and has_been_booked_within_current_month
and not deal_has_been_offboarded
then '03-First Time Booked'
-- 04-Active: The deal has had at least 1 booking in its history and it's
-- been less than 12 months since the last booking and has not been offboarded
-- in hubspot and is not reactivated and is not FTB
when
deal_has_at_least_one_booking
and has_been_booked_within_last_12_months
and not deal_has_been_offboarded
-- not reactivated
and not (
had_previous_booking_more_than_12_months_before_the_last
and has_been_booked_within_current_month
)
-- not FTB
and not (deal_has_first_booking and has_been_booked_within_current_month)
then '04-Active'
-- 05-Churning: The deal has been offboarded this month. Alternatively, The
-- deal has been booked at least once and it's been 12 months since the last
-- booking
when
(
deal_has_at_least_one_booking
and last_booking_was_12_months_ago
and not deal_has_been_offboarded
)
or deal_was_offboarded_this_month
then '05-Churning'
-- 06-Inactive: The deal has been offboarded in the past but not this month.
-- Alternatively, the deal is not offboarded and the deal has been booked at
-- least once and it's been more than 12 months since the last booking.
when
(
deal_has_at_least_one_booking
and not has_been_booked_within_last_12_months
and not last_booking_was_12_months_ago
and not deal_has_been_offboarded
)
or (deal_has_been_offboarded and not deal_was_offboarded_this_month)
then '06-Inactive'
-- 07-Reactivated: The deal is not offboarded but was churned/inactive, and
-- now has had a new booking this month
when
had_previous_booking_more_than_12_months_before_the_last
and has_been_booked_within_current_month
and not deal_has_been_offboarded
then '07-Reactivated'
else null
end as deal_lifecycle_state,
has_been_booked_within_current_month,
has_been_booked_within_last_6_months,
has_been_booked_within_last_12_months,
deal_has_been_offboarded as has_been_offboarded
from deal_historic_features