Merged PR 2107: Adds host lifecycle metrics into biz kpis

This PR closes the first draft of the first batch of business kpis. Host logic has changed to be applied at deal id level.
It's mostly an adapted copy-paste from the accommodation counterpart, specifically:
- `int_core__mtd_deal_lifecycle`: computes the historic deal lifecycle. One line for each deal and MTD date. **Important**: _Not all hosts have a deal set. This will need a data quality report for business teams to fix_
- `int_core__mtd_deal_metrics`: computes the aggregation at MTD date level of the metrics per lifecycle state and activity state

Additionally, this PR changes:
- `int_core__mtd_aggregated_metrics`: it includes the new 3 deal metrics and changes the source of the already existing 3 deal metrics from `mtd_booking_metrics` to the new `mtd_deal_metrics`
- `int_core__mtd_booking_metrics`: removes all code needed to compute the remaining deal metrics, speeding it up considerably.

After this PR, the mtd models run (locally) at the following speed:
- `int_core__mtd_accommodation_lifecycle`: 47 sec
- `int_core__mtd_deal_lifecycle`: 3 sec
- `int_core__mtd_accommodation_metrics`: 5 sec
- `int_core__mtd_deal_metrics`: < 1 sec
- `int_core__mtd_booking_metrics`: 8 sec (quite a reduction)
- `int_core__mtd_guest_journey_metrics`: 5 sec
- `int_core__mtd_aggregated_metrics` and `core__mtd_aggregated_metrics`: < 1 sec

Related work items: #17312
This commit is contained in:
Oriol Roqué Paniagua 2024-06-25 12:20:59 +00:00
parent cebd6f6485
commit 6c053a0753
6 changed files with 418 additions and 82 deletions

View file

@ -1,5 +1,5 @@
/*
This model provides Month-To-Date (MTD) based on Booking metrics.
This model provides Month-To-Date (MTD) based on Accommodation metrics.
*/
{{ config(materialized="table", unique_key="date") }}

View file

@ -54,30 +54,57 @@
"number_format": "integer",
},
{
"source": "int_core__mtd_booking_metrics",
"source": "int_core__mtd_deal_metrics",
"order_by": 7,
"metric": "New Deals",
"value": "new_deals",
"previous_year_value": "previous_year_new_deals",
"relative_increment": "relative_increment_new_deals",
"number_format": "integer",
},
{
"source": "int_core__mtd_deal_metrics",
"order_by": 8,
"metric": "First Time Booked Deals",
"value": "first_time_booked_deals",
"previous_year_value": "previous_year_first_time_booked_deals",
"relative_increment": "relative_increment_first_time_booked_deals",
"number_format": "integer",
},
{
"source": "int_core__mtd_deal_metrics",
"order_by": 9,
"metric": "Hosts Booked in Month",
"value": "hosts_booked_in_month",
"previous_year_value": "previous_year_hosts_booked_in_month",
"relative_increment": "relative_increment_hosts_booked_in_month",
"metric": "Deals Booked in Month",
"value": "deals_booked_in_month",
"previous_year_value": "previous_year_deals_booked_in_month",
"relative_increment": "relative_increment_deals_booked_in_month",
"number_format": "integer",
},
{
"source": "int_core__mtd_booking_metrics",
"source": "int_core__mtd_deal_metrics",
"order_by": 10,
"metric": "Hosts Booked in 6 Months",
"value": "hosts_booked_in_6_months",
"previous_year_value": "previous_year_hosts_booked_in_6_months",
"relative_increment": "relative_increment_hosts_booked_in_6_months",
"metric": "Deals Booked in 6 Months",
"value": "deals_booked_in_6_months",
"previous_year_value": "previous_year_deals_booked_in_6_months",
"relative_increment": "relative_increment_deals_booked_in_6_months",
"number_format": "integer",
},
{
"source": "int_core__mtd_booking_metrics",
"source": "int_core__mtd_deal_metrics",
"order_by": 11,
"metric": "Hosts Booked in 12 Months",
"value": "hosts_booked_in_12_months",
"previous_year_value": "previous_year_hosts_booked_in_12_months",
"relative_increment": "relative_increment_hosts_booked_in_12_months",
"metric": "Deals Booked in 12 Months",
"value": "deals_booked_in_12_months",
"previous_year_value": "previous_year_deals_booked_in_12_months",
"relative_increment": "relative_increment_deals_booked_in_12_months",
"number_format": "integer",
},
{
"source": "int_core__mtd_deal_metrics",
"order_by": 12,
"metric": "Churning Deals",
"value": "churning_deals",
"previous_year_value": "previous_year_churning_deals",
"relative_increment": "relative_increment_churning_deals",
"number_format": "integer",
},
{
@ -171,6 +198,9 @@ with
),
int_core__mtd_accommodation_metrics as (
select * from {{ ref("int_core__mtd_accommodation_metrics") }}
),
int_core__mtd_deal_metrics as (
select * from {{ ref("int_core__mtd_deal_metrics") }}
)
{% for metric in metrics %}
select

View file

@ -42,49 +42,6 @@ with
and upper(b.booking_state) = 'CANCELLED'
group by 1
),
-- Computation of number of months difference for activity-based logic --
creation_month_diff as (
select
d.date,
b.id_user_host,
max(b.created_date_utc) as last_created_date,
(d.year - extract(year from max(b.created_date_utc))) * 12
+ (d.month - extract(month from max(b.created_date_utc))) as month_diff
from int_dates_mtd d
inner join
int_core__bookings b
on b.created_date_utc between d.date - interval '13 months' and d.date -- reduce the computation at 12 + 1 month to reduce the rows to retrieve
group by 1, 2, d.year, d.month
),
-- Computation of booking-based activity metrics --
booking_based_activity as (
select
cmd.date,
-- Hosts/PM activity --
count(
distinct case
when cmd.month_diff = 0 then cmd.id_user_host else null
end
) as hosts_booked_in_month,
count(
distinct case
when cmd.month_diff >= 0 and cmd.month_diff < 6
then cmd.id_user_host
else null
end
) as hosts_booked_in_6_months,
count(
distinct case
when cmd.month_diff >= 0 and cmd.month_diff < 12
then cmd.id_user_host
else null
end
) as hosts_booked_in_12_months,
count(distinct cmd.id_user_host) as historic_booked_hosts
from creation_month_diff cmd
group by 1
),
main_kpi as (
-- Final aggregation of subqueries --
select
@ -96,16 +53,11 @@ with
d.is_current_month,
crym.created_bookings,
coym.check_out_bookings,
caym.cancelled_bookings,
bba.hosts_booked_in_month,
bba.hosts_booked_in_6_months,
bba.hosts_booked_in_12_months,
bba.historic_booked_hosts
caym.cancelled_bookings
from int_dates_mtd d
left join created_year_month crym on crym.date = d.date
left join check_out_year_month coym on coym.date = d.date
left join cancelled_year_month caym on caym.date = d.date
left join booking_based_activity bba on bba.date = d.date
)
-- Pivoting to get previous year for each line & computing relative increment
-- (rel_incr) --
@ -128,23 +80,7 @@ select
a.cancelled_bookings,
b.cancelled_bookings as previous_year_cancelled_bookings,
cast(a.cancelled_bookings as decimal) / b.cancelled_bookings
- 1 as relative_increment_cancelled_bookings,
a.hosts_booked_in_month,
b.hosts_booked_in_month as previous_year_hosts_booked_in_month,
cast(a.hosts_booked_in_month as decimal) / b.hosts_booked_in_month
- 1 as relative_increment_hosts_booked_in_month,
a.hosts_booked_in_6_months,
b.hosts_booked_in_6_months as previous_year_hosts_booked_in_6_months,
cast(a.hosts_booked_in_6_months as decimal) / b.hosts_booked_in_6_months
- 1 as relative_increment_hosts_booked_in_6_months,
a.hosts_booked_in_12_months,
b.hosts_booked_in_12_months as previous_year_hosts_booked_in_12_months,
cast(a.hosts_booked_in_12_months as decimal) / b.hosts_booked_in_12_months
- 1 as relative_increment_hosts_booked_in_12_months,
a.historic_booked_hosts,
b.historic_booked_hosts as previous_year_historic_booked_hosts,
cast(a.historic_booked_hosts as decimal) / b.historic_booked_hosts
- 1 as relative_increment_historic_booked_hosts
- 1 as relative_increment_cancelled_bookings
from main_kpi a
left join main_kpi b on a.month = b.month and a.year = b.year + 1
where (a.is_end_of_month = 1 or (a.is_current_month = 1 and a.day = b.day))

View file

@ -0,0 +1,166 @@
/*
This model provides Month-To-Date (MTD) deal lifecycle based on booking metrics.
Assumes a host will have a deal being filled.
*/
{{ config(materialized="table", unique_key=["date","id_deal"]) }}
with
int_core__bookings as (select * from {{ ref("int_core__bookings") }}),
int_core__unified_user as (select * from {{ ref("int_core__unified_user") }}),
int_dates_mtd as (select * from {{ ref("int_dates_mtd") }}),
booked_days_per_host as (
select
id_user_host,
created_date_utc,
lag(created_date_utc, 1) over (
partition by id_user_host order by created_date_utc asc
) as previous_booked_date
from int_core__bookings
group by id_user_host, created_date_utc
),
deal_historic_booking_dates as (
select
d.date,
h.id_deal,
min(h.created_date_utc) as creation_date_utc,
min(b.created_date_utc) as first_time_booked_date_utc,
max(b.created_date_utc) as last_time_booked_date_utc,
max(b.previous_booked_date) as second_to_last_time_booked_date_utc
from int_dates_mtd d
inner join int_core__unified_user h on d.date >= h.created_date_utc
left join
booked_days_per_host b
on h.id_user = b.id_user_host
and d.date >= b.created_date_utc
where h.id_deal is not null
group by d.date, h.id_deal
),
deal_historic_booking_features as (
select
hhbf.date,
hhbf.id_deal,
hhbf.creation_date_utc,
hhbf.first_time_booked_date_utc,
hhbf.last_time_booked_date_utc,
hhbf.second_to_last_time_booked_date_utc,
case
when hhbf.first_time_booked_date_utc is not null then true else false
end as deal_has_at_least_one_booking,
case
when
date_trunc('month', hhbf.creation_date_utc)
= date_trunc('month', hhbf.date)
then true
else false
end as deal_was_created_this_month,
case
when
date_trunc('month', hhbf.first_time_booked_date_utc)
= date_trunc('month', hhbf.last_time_booked_date_utc)
then true
else false
end as deal_has_first_booking,
case
when
date_trunc('month', hhbf.last_time_booked_date_utc)
= date_trunc('month', hhbf.date)
then true
else false
end as has_been_booked_within_current_month,
case
when
date_trunc('month', hhbf.last_time_booked_date_utc)
+ interval '6 months'
> date_trunc('month', hhbf.date)
then true
else false
end as has_been_booked_within_last_6_months,
case
when
date_trunc('month', hhbf.last_time_booked_date_utc)
+ interval '12 months'
> date_trunc('month', hhbf.date)
then true
else false
end as has_been_booked_within_last_12_months,
case
when
date_trunc('month', hhbf.last_time_booked_date_utc)
+ interval '12 months'
= date_trunc('month', hhbf.date)
then true
else false
end as last_booking_was_12_months_ago,
case
when
date_trunc('month', hhbf.second_to_last_time_booked_date_utc)
+ interval '12 months'
< date_trunc('month', hhbf.last_time_booked_date_utc)
then true
else false
end as had_previous_booking_more_than_12_months_before_the_last
from deal_historic_booking_dates hhbf
)
select
date,
id_deal,
creation_date_utc,
first_time_booked_date_utc,
last_time_booked_date_utc,
second_to_last_time_booked_date_utc,
case
-- 01-New: The deal has been created this month and has not had any booking
when
not deal_has_at_least_one_booking
and deal_was_created_this_month
then '01-New'
-- 02-Never Booked: The deal has been created before this month and has not
-- had any booking
when
not deal_has_at_least_one_booking
and not deal_was_created_this_month
then '02-Never Booked'
-- 03-First Time Booked: The deal has been booked for the first time and it
-- has been created this month
when deal_has_first_booking and has_been_booked_within_current_month
then '03-First Time Booked'
-- 04-Active: The deal has had at least 1 booking in its history and it's
-- been less than 12 months since the last booking
-- and is not reactivated and is not FTB
when
deal_has_at_least_one_booking
and has_been_booked_within_last_12_months
-- not reactivated
and not (
had_previous_booking_more_than_12_months_before_the_last
and has_been_booked_within_current_month
)
-- not FTB
and not (
deal_has_first_booking and has_been_booked_within_current_month
)
then '04-Active'
-- 05-Churning: The deal has been booked at least once and it's been 12
-- months since the last booking
when deal_has_at_least_one_booking and last_booking_was_12_months_ago
then '05-Churning'
-- 06-Inactive: The deal has been booked at least once and it's been more
-- than 12 months since the last booking
when
deal_has_at_least_one_booking
and not has_been_booked_within_last_12_months
and not last_booking_was_12_months_ago
then '06-Inactive'
-- 07-Reactivated: The deal was churned/inactive but has had a new booking
-- this month
when
had_previous_booking_more_than_12_months_before_the_last
and has_been_booked_within_current_month
then '07-Reactivated'
else null
end as deal_lifecycle_state,
has_been_booked_within_current_month,
has_been_booked_within_last_6_months,
has_been_booked_within_last_12_months
from deal_historic_booking_features

View file

@ -0,0 +1,101 @@
/*
This model provides Month-To-Date (MTD) based on Deal metrics.
*/
{{ config(materialized="table", unique_key="date") }}
with
int_core__mtd_deal_lifecycle as (select * from {{ ref("int_core__mtd_deal_lifecycle") }}),
int_dates_mtd as (select * from {{ ref("int_dates_mtd") }}),
deals_metric_aggregation_per_date as (
select
al.date,
COUNT(distinct case when al.deal_lifecycle_state = '01-New' then al.id_deal else null end) as new_deals,
COUNT(distinct case when al.deal_lifecycle_state = '02-Never Booked' then al.id_deal else null end) as never_booked_deals,
COUNT(distinct case when al.deal_lifecycle_state = '03-First Time Booked' then al.id_deal else null end) as first_time_booked_deals,
COUNT(distinct case when al.deal_lifecycle_state = '04-Active' then al.id_deal else null end) as active_deals,
COUNT(distinct case when al.deal_lifecycle_state = '05-Churning' then al.id_deal else null end) as churning_deals,
COUNT(distinct case when al.deal_lifecycle_state = '06-Inactive' then al.id_deal else null end) as inactive_deals,
COUNT(distinct case when al.deal_lifecycle_state = '07-Reactivated' then al.id_deal else null end) as reactivated_deals,
SUM(case when has_been_booked_within_current_month then 1 else 0 end) as deals_booked_in_month,
SUM(case when has_been_booked_within_last_6_months then 1 else 0 end) as deals_booked_in_6_months,
SUM(case when has_been_booked_within_last_12_months then 1 else 0 end) as deals_booked_in_12_months
from int_core__mtd_deal_lifecycle al
group by al.date
),
main_kpi as (
-- Final aggregation of subqueries --
select
d.year,
d.month,
d.day,
d.date,
d.is_end_of_month,
d.is_current_month,
nullif(l.new_deals,0) as new_deals,
nullif(l.never_booked_deals,0) as never_booked_deals,
nullif(l.first_time_booked_deals,0) as first_time_booked_deals,
nullif(l.active_deals,0) as active_deals,
nullif(l.churning_deals,0) as churning_deals,
nullif(l.inactive_deals,0) as inactive_deals,
nullif(l.reactivated_deals,0) as reactivated_deals,
nullif(l.deals_booked_in_month,0) as deals_booked_in_month,
nullif(l.deals_booked_in_6_months,0) as deals_booked_in_6_months,
nullif(l.deals_booked_in_12_months,0) as deals_booked_in_12_months
from int_dates_mtd d
left join deals_metric_aggregation_per_date l on l.date = d.date
)
-- Pivoting to get previous year for each line & computing relative increment
-- (rel_incr) --
select
a.year,
a.month,
a.day,
a.is_end_of_month,
a.is_current_month,
a.date,
b.date as previous_year_date,
a.new_deals,
b.new_deals as previous_year_new_deals,
cast(a.new_deals as decimal) / b.new_deals
- 1 as relative_increment_new_deals,
a.never_booked_deals,
b.never_booked_deals as previous_year_never_booked_deals,
cast(a.never_booked_deals as decimal) / b.never_booked_deals
- 1 as relative_increment_never_booked_deals,
a.active_deals,
b.active_deals as previous_year_active_deals,
cast(a.active_deals as decimal) / b.active_deals
- 1 as relative_increment_active_deals,
a.churning_deals,
b.churning_deals as previous_year_churning_deals,
cast(a.churning_deals as decimal) / b.churning_deals
- 1 as relative_increment_churning_deals,
a.inactive_deals,
b.inactive_deals as previous_year_inactive_deals,
cast(a.inactive_deals as decimal) / b.inactive_deals
- 1 as relative_increment_inactive_deals,
a.reactivated_deals,
b.reactivated_deals as previous_year_reactivated_deals,
cast(a.reactivated_deals as decimal) / b.reactivated_deals
- 1 as relative_increment_reactivated_deals,
a.first_time_booked_deals,
b.first_time_booked_deals as previous_year_first_time_booked_deals,
cast(a.first_time_booked_deals as decimal) / b.first_time_booked_deals
- 1 as relative_increment_first_time_booked_deals,
a.deals_booked_in_month,
b.deals_booked_in_month as previous_year_deals_booked_in_month,
cast(a.deals_booked_in_month as decimal) / b.deals_booked_in_month
- 1 as relative_increment_deals_booked_in_month,
a.deals_booked_in_6_months,
b.deals_booked_in_6_months as previous_year_deals_booked_in_6_months,
cast(a.deals_booked_in_6_months as decimal) / b.deals_booked_in_6_months
- 1 as relative_increment_deals_booked_in_6_months,
a.deals_booked_in_12_months,
b.deals_booked_in_12_months as previous_year_deals_booked_in_12_months,
cast(a.deals_booked_in_12_months as decimal) / b.deals_booked_in_12_months
- 1 as relative_increment_deals_booked_in_12_months
from main_kpi a
left join main_kpi b on a.month = b.month and a.year = b.year + 1
where (a.is_end_of_month = 1 or (a.is_current_month = 1 and a.day = b.day))

View file

@ -171,6 +171,20 @@ models:
- not_null
- unique
- name: int_core__mtd_deal_metrics
description: |
This model contains the historic information regarding the deals in an aggregated manner.
It's used for the business KPIs. Data is aggregated at the last day of the month and in the
days necessary for the Month-to-Date computation of the current month.
columns:
- name: date
data_type: date
description: The date for the month-to-date deal-related metrics.
tests:
- not_null
- unique
- name: int_core__mtd_aggregated_metrics
description: |
The `int_core__mtd_aggregated_metrics` model aggregates multiple metrics on a year, month, and day basis.
@ -711,3 +725,92 @@ models:
data_type: boolean
description: If the listing has had a booking created in the past 12 months.
- name: int_core__mtd_deal_lifecycle
description: |
This model contains the historic information regarding the lifecycle of hosts, at deal level.
The information regarding the booking-related time allows for the current status of any
deal regarding its activity. This information is encapsulated in the following columns:
deal_lifecycle_state: contains one of the following states
- 01-New: Deals that have been created in the current month, without bookings
- 02-Never Booked: Deals that have been created before the current month, without bookings.
- 03-First Time Booked: Deals that have been booked for the first time in the current month.
- 04-Active: Deals that have booking activity in the past 12 months (that are not FTB nor reactivated)
- 05-Churning: Deals that are becoming inactive because of lack of bookings in the past 12 months
- 06-Inactive: Deals that have not had a booking for more than 12 months.
- 07-Reactivated: Deals that have had a booking in the current month that were inactive or churning before.
- Finally, if none of the logic applies, which should not happen, null will be set and a dbt alert will raise.
Since the states of Active, First Time Booked and Reactivated indicate certain booking activity and are
mutually exclusive, the model also provides information of the recency of the bookings by the following
booleans:
- has_been_booked_within_current_month: If a deal has had a booking created in the current month
- has_been_booked_within_last_6_months: If a deal has had a booking created in the past 6 months
- has_been_booked_within_last_12_months: If a deal has had a booking created in the past 12 months
Note that if a deal has had a booking created in a given month, all 3 columns will be true. Similarly,
if the last booking created to a deal was 5 months ago, only the column has_been_booked_in_1_month
will be false; while the other 2 will be true.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- date
- id_deal
columns:
- name: date
data_type: date
description: The date for the month-to-date. Information is inclusive to the date displayed.
tests:
- not_null
- name: id_deal
data_type: bigint
description: Id of the deal associated to the host.
tests:
- not_null
- name: creation_date_utc
data_type: date
description: Date of when the first host associated to that deal was created.
- name: first_time_booked_date_utc
data_type: date
description: |
Date of the first booking created for a given deal. Can be null if the deal
has never had a booking associated with it.
- name: last_time_booked_date_utc
data_type: date
description: |
Date of the last booking created for a given deal. Can be null if the deal
has never had a booking associated with it. Can be the same as first_time_booked_date_utc
if the deal only had 1 booking in its history.
- name: second_to_last_time_booked_date_utc
data_type: date
description: |
Date of the second-to-last booking created for a given deal, meaning the creation
date of the booking that precedes the last one. It's relevant for the reactivation computation
on the lifecycle. Can be null if the deal has never had a booking associated with it or if
the deal only had 1 booking in its history.
- name: deal_lifecycle_state
data_type: character varying
description: |
Contains the lifecycle state of a deal. The accepted values are:
01-New, 02-Never Booked, 03-First Time Booked, 04-Active, 05-Churning, 06-Inactive,
07-Reactivated. Failing to implement the logic will result in alert.
tests:
- not_null
- name: has_been_booked_within_current_month
data_type: boolean
description: If the deal has had a booking created in the current month.
- name: has_been_booked_within_last_6_months
data_type: boolean
description: If the deal has had a booking created in the past 6 months.
- name: has_been_booked_within_last_12_months
data_type: boolean
description: If the deal has had a booking created in the past 12 months.