Merged PR 3472: Remove unused models and schema entries for deals and accommodations

# Description

Eliminates models that have been switched with new kpis flow.
Also deletes temporary tests and schema entries.

# Checklist

- [X] The edited models and dependants run properly with production data.
- [NA] The edited models are sufficiently documented.
- [X] The edited models contain PK tests, and I've ran and passed them.
- [X] I have checked for DRY opportunities with other models and docs.
- [NA] I've picked the right materialization for the affected models.

# Other

- [ ] Check if a full-refresh is required after this PR is merged.

Related work items: #23762
This commit is contained in:
Oriol Roqué Paniagua 2024-11-08 12:05:40 +00:00
parent 5e1b418570
commit 2f80642f6c
10 changed files with 1 additions and 1358 deletions

View file

@ -1,217 +0,0 @@
/*
This model provides Month-To-Date (MTD) deal lifecycle based on booking metrics.
Assumes a host will have a deal being filled.
*/
{{ config(materialized="table", unique_key=["date", "id_deal"]) }}
with
int_core__bookings as (select * from {{ ref("int_core__bookings") }}),
int_core__user_host as (select * from {{ ref("int_core__user_host") }}),
int_dates_mtd as (select * from {{ ref("int_dates_mtd") }}),
int_hubspot__deal as (select * from {{ ref("int_hubspot__deal") }}),
hubspot_deal_offboardings as (
-- At the moment it's not possible to account for deal reactivation within
-- hubspot, meaning, once a deal has a cancellation date we'll be considering
-- it as churned. However, a deal can be reactivated and if so, the
-- cancellation date might be set up to null, thus loosing the previous
-- history of offboarding and affecting backwards history. Lastly,
-- cancellation date not being null is preferred vs. the hubspot deal stage
-- since an actual churned deal might be in a process of reactivation,
-- discussing with our colleagues from sales / am teams. In this discussion
-- period, the stage could be different than cancelled, but still not
-- reactivated.
select id_deal, cancellation_date_utc
from int_hubspot__deal
where cancellation_date_utc is not null
),
booked_days_per_host as (
select
id_user_host,
created_date_utc,
lag(created_date_utc, 1) over (
partition by id_user_host order by created_date_utc asc
) as previous_booked_date
from int_core__bookings
group by id_user_host, created_date_utc
),
deal_historic_booking_dates as (
select
d.date,
h.id_deal,
min(h.created_date_utc) as creation_date_utc,
min(b.created_date_utc) as first_time_booked_date_utc,
max(b.created_date_utc) as last_time_booked_date_utc,
max(b.previous_booked_date) as second_to_last_time_booked_date_utc
from int_dates_mtd d
inner join int_core__user_host h on d.date >= date(h.created_date_utc)
left join
booked_days_per_host b
on h.id_user_host = b.id_user_host
and d.date >= b.created_date_utc
where h.id_deal is not null
group by d.date, h.id_deal
),
deal_historic_features as (
select
hhbf.date,
hhbf.id_deal,
hhbf.creation_date_utc,
hhbf.first_time_booked_date_utc,
hhbf.last_time_booked_date_utc,
hhbf.second_to_last_time_booked_date_utc,
hdo.cancellation_date_utc,
case
when hhbf.date >= hdo.cancellation_date_utc then true else false
end as deal_has_been_offboarded,
case
when
date_trunc('month', hdo.cancellation_date_utc)
= date_trunc('month', hhbf.date)
then true
else false
end as deal_was_offboarded_this_month,
case
when hhbf.first_time_booked_date_utc is not null then true else false
end as deal_has_at_least_one_booking,
case
when
date_trunc('month', hhbf.creation_date_utc)
= date_trunc('month', hhbf.date)
then true
else false
end as deal_was_created_this_month,
case
when
date_trunc('month', hhbf.first_time_booked_date_utc)
= date_trunc('month', hhbf.last_time_booked_date_utc)
then true
else false
end as deal_has_first_booking,
case
when
date_trunc('month', hhbf.last_time_booked_date_utc)
= date_trunc('month', hhbf.date)
then true
else false
end as has_been_booked_within_current_month,
case
when
date_trunc('month', hhbf.last_time_booked_date_utc)
+ interval '6 months'
> date_trunc('month', hhbf.date)
then true
else false
end as has_been_booked_within_last_6_months,
case
when
date_trunc('month', hhbf.last_time_booked_date_utc)
+ interval '12 months'
> date_trunc('month', hhbf.date)
then true
else false
end as has_been_booked_within_last_12_months,
case
when
date_trunc('month', hhbf.last_time_booked_date_utc)
+ interval '12 months'
= date_trunc('month', hhbf.date)
then true
else false
end as last_booking_was_12_months_ago,
case
when
date_trunc('month', hhbf.second_to_last_time_booked_date_utc)
+ interval '12 months'
< date_trunc('month', hhbf.last_time_booked_date_utc)
then true
else false
end as had_previous_booking_more_than_12_months_before_the_last
from deal_historic_booking_dates hhbf
left join hubspot_deal_offboardings hdo on hhbf.id_deal = hdo.id_deal
)
select
date,
id_deal,
creation_date_utc,
first_time_booked_date_utc,
last_time_booked_date_utc,
second_to_last_time_booked_date_utc,
cancellation_date_utc,
case
-- 01-New: The deal has been created this month and has not had any booking.
-- Additionally, the deal has not been offboarded in hubspot.
when
not deal_has_at_least_one_booking
and deal_was_created_this_month
and not deal_has_been_offboarded
then '01-New'
-- 02-Never Booked: The deal has been created before this month and has not
-- had any booking. Additionally, the deal has not been offboarded in hubspot.
when
not deal_has_at_least_one_booking
and not deal_was_created_this_month
and not deal_has_been_offboarded
then '02-Never Booked'
-- 03-First Time Booked: The deal has been booked for the first time and it
-- has been created this month. Additionally, the deal has not been offboarded
-- in hubspot.
when
deal_has_first_booking
and has_been_booked_within_current_month
and not deal_has_been_offboarded
then '03-First Time Booked'
-- 04-Active: The deal has had at least 1 booking in its history and it's
-- been less than 12 months since the last booking and has not been offboarded
-- in hubspot and is not reactivated and is not FTB
when
deal_has_at_least_one_booking
and has_been_booked_within_last_12_months
and not deal_has_been_offboarded
-- not reactivated
and not (
had_previous_booking_more_than_12_months_before_the_last
and has_been_booked_within_current_month
)
-- not FTB
and not (deal_has_first_booking and has_been_booked_within_current_month)
then '04-Active'
-- 05-Churning: The deal has been offboarded this month. Alternatively, The
-- deal has been booked at least once and it's been 12 months since the last
-- booking
when
(
deal_has_at_least_one_booking
and last_booking_was_12_months_ago
and not deal_has_been_offboarded
)
or deal_was_offboarded_this_month
then '05-Churning'
-- 06-Inactive: The deal has been offboarded in the past but not this month.
-- Alternatively, the deal is not offboarded and the deal has been booked at
-- least once and it's been more than 12 months since the last booking.
when
(
deal_has_at_least_one_booking
and not has_been_booked_within_last_12_months
and not last_booking_was_12_months_ago
and not deal_has_been_offboarded
)
or (deal_has_been_offboarded and not deal_was_offboarded_this_month)
then '06-Inactive'
-- 07-Reactivated: The deal is not offboarded but was churned/inactive, and
-- now has had a new booking this month
when
had_previous_booking_more_than_12_months_before_the_last
and has_been_booked_within_current_month
and not deal_has_been_offboarded
then '07-Reactivated'
else null
end as deal_lifecycle_state,
has_been_booked_within_current_month,
has_been_booked_within_last_6_months,
has_been_booked_within_last_12_months,
deal_has_been_offboarded as has_been_offboarded
from deal_historic_features

View file

@ -1,125 +0,0 @@
/*
This model provides Month-To-Date (MTD) based on Deal metrics.
*/
{% set dimensions = get_kpi_dimensions() %}
{{ config(materialized="table", unique_key=["date", "dimension", "dimension_value"]) }}
with
int_mtd_deal_lifecycle as (select * from {{ ref("int_mtd_deal_lifecycle") }}),
int_dates_mtd_by_dimension as (
select * from {{ ref("int_dates_mtd_by_dimension") }}
),
int_core__mtd_accommodation_segmentation as (
select * from {{ ref("int_core__mtd_accommodation_segmentation") }}
),
int_core__deal as (select * from {{ ref("int_core__deal") }}),
deals_metric_aggregation_per_date as (
{% for dimension in dimensions %}
select
al.date,
{{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value,
count(
distinct case
when al.deal_lifecycle_state = '01-New'
then al.id_deal
else null
end
) as new_deals,
count(
distinct case
when al.deal_lifecycle_state = '02-Never Booked'
then al.id_deal
else null
end
) as never_booked_deals,
count(
distinct case
when al.deal_lifecycle_state = '03-First Time Booked'
then al.id_deal
else null
end
) as first_time_booked_deals,
count(
distinct case
when al.deal_lifecycle_state = '04-Active'
then al.id_deal
else null
end
) as active_deals,
count(
distinct case
when al.deal_lifecycle_state = '05-Churning'
then al.id_deal
else null
end
) as churning_deals,
count(
distinct case
when al.deal_lifecycle_state = '06-Inactive'
then al.id_deal
else null
end
) as inactive_deals,
count(
distinct case
when al.deal_lifecycle_state = '07-Reactivated'
then al.id_deal
else null
end
) as reactivated_deals,
sum(
case when has_been_booked_within_current_month then 1 else 0 end
) as deals_booked_in_month,
sum(
case when has_been_booked_within_last_6_months then 1 else 0 end
) as deals_booked_in_6_months,
sum(
case when has_been_booked_within_last_12_months then 1 else 0 end
) as deals_booked_in_12_months
from int_mtd_deal_lifecycle al
{% if dimension.dimension == "'by_number_of_listings'" %}
inner join
int_core__mtd_accommodation_segmentation mas
on al.id_deal = mas.id_deal
and al.date = mas.date
{% elif dimension.dimension == "'by_billing_country'" %}
inner join
int_core__deal ud
on al.id_deal = ud.id_deal
and ud.main_billing_country_iso_3_per_deal is not null
{% endif %}
group by 1, 2, 3
{% if not loop.last %}
union all
{% endif %}
{% endfor %}
)
-- Final aggregation of subqueries --
select
d.year,
d.month,
d.day,
d.date,
d.dimension,
d.dimension_value,
d.is_end_of_month,
d.is_current_month,
nullif(l.new_deals, 0) as new_deals,
nullif(l.never_booked_deals, 0) as never_booked_deals,
nullif(l.first_time_booked_deals, 0) as first_time_booked_deals,
nullif(l.active_deals, 0) as active_deals,
nullif(l.churning_deals, 0) as churning_deals,
nullif(l.inactive_deals, 0) as inactive_deals,
nullif(l.reactivated_deals, 0) as reactivated_deals,
nullif(l.deals_booked_in_month, 0) as deals_booked_in_month,
nullif(l.deals_booked_in_6_months, 0) as deals_booked_in_6_months,
nullif(l.deals_booked_in_12_months, 0) as deals_booked_in_12_months
from int_dates_mtd_by_dimension d
left join
deals_metric_aggregation_per_date l
on l.date = d.date
and l.dimension = d.dimension
and l.dimension_value = d.dimension_value

View file

@ -1418,158 +1418,3 @@ models:
- name: listings_booked_in_month_churn_average_contribution
data_type: numeric
description: Listings Booked in Month churn rate (average approach).
- name: int_mtd_deal_lifecycle
description: |
This model contains the historic information regarding the lifecycle of hosts, at deal level.
The information regarding the booking-related time allows for the current status of any
deal regarding its activity. This information is encapsulated in the following columns:
deal_lifecycle_state: contains one of the following states
- 01-New: Deals that have been created in the current month, without bookings, that are not offboarded.
- 02-Never Booked: Deals that have been created before the current month, without bookings, that are not offboarded.
- 03-First Time Booked: Deals that have been booked for the first time in the current month, that are not offboarded.
- 04-Active: Deals that have booking activity in the past 12 months (that are not FTB nor reactivated), that are not offboarded.
- 05-Churning: Either Deals that are offboarded in that month or Deals that are becoming inactive because of lack of bookings in the past 12 months
- 06-Inactive: Either Deals that have been previously offboarded or Deals that have not had a booking for more than 12 months.
- 07-Reactivated: Deals that have had a booking in the current month that were inactive or churning before, that are not offboarded.
- Finally, if none of the logic applies, which should not happen, null will be set and a dbt alert will raise.
Since the states of Active, First Time Booked and Reactivated indicate certain booking activity and are
mutually exclusive, the model also provides information of the recency of the bookings by the following
booleans:
- has_been_booked_within_current_month: If a deal has had a booking created in the current month
- has_been_booked_within_last_6_months: If a deal has had a booking created in the past 6 months
- has_been_booked_within_last_12_months: If a deal has had a booking created in the past 12 months
Note that if a deal has had a booking created in a given month, all 3 columns will be true. Similarly,
if the last booking created to a deal was 5 months ago, only the column has_been_booked_in_1_month
will be false; while the other 2 will be true.
Some final considerations:
- It's possible but not common that a Deal gets offboarded on the same month that has had some bookings created.
- It shouldn't happen that a Deal that is Inactive has some bookings created. However, there's few cases in which
this happens likely because of misconfiguration between Hubspot and Core. This should be reported to increase
data quality.
deprecation_date: 2024-11-30
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- date
- id_deal
columns:
- name: date
data_type: date
description: The date for the month-to-date. Information is inclusive to the date displayed.
tests:
- not_null
- name: id_deal
data_type: character varying
description: Id of the deal associated to the host.
tests:
- not_null
- name: creation_date_utc
data_type: date
description: Date of when the first host associated to that deal was created.
- name: first_time_booked_date_utc
data_type: date
description: |
Date of the first booking created for a given deal. Can be null if the deal
has never had a booking associated with it.
- name: last_time_booked_date_utc
data_type: date
description: |
Date of the last booking created for a given deal. Can be null if the deal
has never had a booking associated with it. Can be the same as first_time_booked_date_utc
if the deal only had 1 booking in its history.
- name: second_to_last_time_booked_date_utc
data_type: date
description: |
Date of the second-to-last booking created for a given deal, meaning the creation
date of the booking that precedes the last one. It's relevant for the reactivation computation
on the lifecycle. Can be null if the deal has never had a booking associated with it or if
the deal only had 1 booking in its history.
- name: cancellation_date_utc
data_type: date
description: |
Date of when the deal was cancelled, according to Hubspot. This is the date we're considering
for hard offboarding. It can be null, meaning the account has not been offboarded.
- name: deal_lifecycle_state
data_type: character varying
description: |
Contains the lifecycle state of a deal. The accepted values are:
01-New, 02-Never Booked, 03-First Time Booked, 04-Active, 05-Churning, 06-Inactive,
07-Reactivated. Failing to implement the logic will result in alert.
tests:
- not_null
- accepted_values:
values:
- 01-New
- 02-Never Booked
- 03-First Time Booked
- 04-Active
- 05-Churning
- 06-Inactive
- 07-Reactivated
- name: has_been_booked_within_current_month
data_type: boolean
description: If the deal has had a booking created in the current month.
- name: has_been_booked_within_last_6_months
data_type: boolean
description: If the deal has had a booking created in the past 6 months.
- name: has_been_booked_within_last_12_months
data_type: boolean
description: If the deal has had a booking created in the past 12 months.
- name: has_been_offboarded
data_type: boolean
description: If the deal has been cancelled or not.
- name: int_mtd_deal_metrics
description: |
This model contains the historic information regarding the deals in an aggregated manner.
It's used for the business KPIs. Data is aggregated at the last day of the month and in the
days necessary for the Month-to-Date computation of the current month.
deprecation_date: 2024-11-30
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- date
- dimension
- dimension_value
columns:
- name: date
data_type: date
description: The date for the month-to-date deal-related metrics.
tests:
- not_null
- name: dimension
data_type: string
description: The dimension or granularity of the metrics.
tests:
- accepted_values:
values:
- global
- by_number_of_listings
- by_billing_country
- name: dimension_value
data_type: string
description: The value or segment available for the selected dimension.
tests:
- not_null