# Description Creates skeleton for new KPIs data flow for created_bookings metric. Details are accessible [here](https://www.notion.so/knowyourguest-superhog/KPIs-Refactor-Let-s-go-daily-2024-10-23-1280446ff9c980dc87a3dc7453e95f06?pvs=4#12a0446ff9c98085bf4dfc77f6fc22f7) In essence: * Models are created in intermediate in a kpis folder. * Models have a daily segmentation. This includes `created_bookings` models, but also the daily lifecycle per listing and the segmentation. It also adds a `dimension_dates` model specific for KPIs. These have all the dimensions already in place and handle all the crazy logic. * Other time aggregation models simply read from existing daily models which are much easier (`int_kpis__metric_mtd_created_bookings` and `int_kpis__metric_monthly_created_bookings`). * Dimensionality aggregation can be easily added within a given timeframe (daily, mtd, monthly). For instance, I do it for mtd in the `int_kpis__aggregated_mtd_created_bookings` and for monthly in `int_kpis__aggregated_monthly_created_bookings` * Macro configuration for dimensions: Allows to set any specific dimension for `aggregated` models. By default, the subset of global, by billing country, by number of listings and by deal apply - since these are needed for Main KPIs. I added an example with Dash Source, that currently does not exist and it's currently configured as only appearing for created bookings. * Testing `aggregated` models completeness. A new macro called `assert_dimension_completeness` is available that ensures additive metrics are consistent vs. the global result, configurable at schema level. * Testing refactor impact. I'm aware that changing the lifecycle model to daily impacts the volumes for listing segments. For the rest, I added a `tmp` test that checks that the dimension and dimension value per date exactly match comparing new vs. old computation. Latest edits: * Changed naming convention * Split of MTD and Monthly. Now these are 2 different entities, as stated in `int_kpis__dimension_dates`. * Added start_date and end_date for models that contemplate a range (mtd, monthly). * Added a small readme entry in the kpis folders. Mostly it states nomenclature and some first conventions. Dbt docs:  # Checklist - [X] The edited models and dependants run properly with production data. - [X] The edited models are sufficiently documented. - [X] The edited models contain PK tests, and I've ran and passed them. - [ ] I have checked for DRY opportunities with other models and docs. **Likely we'll be able to add macros for mtd and dim_agg models. We will see later on.** - [ ] I've picked the right materialization for the affected models. **Models run ok except for the daily lifecycle of listings, which lasts several minutes in the first run. Model curr...
187 lines
7.3 KiB
SQL
187 lines
7.3 KiB
SQL
{{
|
|
config(
|
|
materialized="incremental",
|
|
unique_key=["date", "id_accommodation"],
|
|
incremental_strategy="merge",
|
|
)
|
|
}}
|
|
with
|
|
int_core__bookings as (select * from {{ ref("int_core__bookings") }}),
|
|
stg_core__accommodation as (select * from {{ ref("stg_core__accommodation") }}),
|
|
int_core__unique_accommodation_to_user as (
|
|
select * from {{ ref("int_core__unique_accommodation_to_user") }}
|
|
),
|
|
int_kpis__dimension_dates as (select * from {{ ref("int_kpis__dimension_dates") }}),
|
|
|
|
booked_days_per_accommodation as (
|
|
select
|
|
id_accommodation,
|
|
created_date_utc,
|
|
lag(created_date_utc, 1) over (
|
|
partition by id_accommodation order by created_date_utc asc
|
|
) as previous_booked_date
|
|
from int_core__bookings
|
|
group by id_accommodation, created_date_utc
|
|
|
|
),
|
|
accommodation_historic_booking_dates as (
|
|
select
|
|
d.date,
|
|
a.id_accommodation,
|
|
date(a.created_at_utc) as creation_date_utc,
|
|
min(b.created_date_utc) as first_time_booked_date_utc,
|
|
max(b.created_date_utc) as last_time_booked_date_utc,
|
|
max(b.previous_booked_date) as second_to_last_time_booked_date_utc
|
|
from int_kpis__dimension_dates d
|
|
inner join stg_core__accommodation a on d.date >= date(a.created_at_utc)
|
|
-- Inclusion of unique accommodation to user to force that the
|
|
-- accommodation needs to have a host assigned into it.
|
|
inner join
|
|
int_core__unique_accommodation_to_user uatu
|
|
on uatu.id_accommodation = a.id_accommodation
|
|
left join
|
|
booked_days_per_accommodation b
|
|
on uatu.id_accommodation = b.id_accommodation
|
|
and d.date >= b.created_date_utc
|
|
|
|
{% if is_incremental() %}
|
|
where d.date >= (select current_date - interval '7 days')
|
|
{% endif %}
|
|
|
|
group by d.date, a.id_accommodation, date(a.created_at_utc)
|
|
),
|
|
accommodation_historic_booking_features as (
|
|
select
|
|
ahbd.date,
|
|
ahbd.id_accommodation,
|
|
ahbd.creation_date_utc,
|
|
ahbd.first_time_booked_date_utc,
|
|
ahbd.last_time_booked_date_utc,
|
|
ahbd.second_to_last_time_booked_date_utc,
|
|
case
|
|
when ahbd.first_time_booked_date_utc is not null then true else false
|
|
end as accommodation_has_at_least_one_booking,
|
|
case
|
|
when
|
|
date_trunc('month', ahbd.creation_date_utc)
|
|
= date_trunc('month', ahbd.date)
|
|
then true
|
|
else false
|
|
end as accommodation_was_created_this_month,
|
|
case
|
|
when
|
|
date_trunc('month', ahbd.first_time_booked_date_utc)
|
|
= date_trunc('month', ahbd.last_time_booked_date_utc)
|
|
then true
|
|
else false
|
|
end as accommodation_has_first_booking,
|
|
case
|
|
when
|
|
date_trunc('month', ahbd.last_time_booked_date_utc)
|
|
= date_trunc('month', ahbd.date)
|
|
then true
|
|
else false
|
|
end as has_been_booked_within_current_month,
|
|
case
|
|
when
|
|
date_trunc('month', ahbd.last_time_booked_date_utc)
|
|
+ interval '6 months'
|
|
> date_trunc('month', ahbd.date)
|
|
then true
|
|
else false
|
|
end as has_been_booked_within_last_6_months,
|
|
case
|
|
when
|
|
date_trunc('month', ahbd.last_time_booked_date_utc)
|
|
+ interval '12 months'
|
|
> date_trunc('month', ahbd.date)
|
|
then true
|
|
else false
|
|
end as has_been_booked_within_last_12_months,
|
|
case
|
|
when
|
|
date_trunc('month', ahbd.last_time_booked_date_utc)
|
|
+ interval '12 months'
|
|
= date_trunc('month', ahbd.date)
|
|
then true
|
|
else false
|
|
end as last_booking_was_12_months_ago,
|
|
case
|
|
when
|
|
date_trunc('month', ahbd.second_to_last_time_booked_date_utc)
|
|
+ interval '12 months'
|
|
< date_trunc('month', ahbd.last_time_booked_date_utc)
|
|
then true
|
|
else false
|
|
end as had_previous_booking_more_than_12_months_before_the_last
|
|
from accommodation_historic_booking_dates ahbd
|
|
)
|
|
select
|
|
date,
|
|
id_accommodation,
|
|
creation_date_utc,
|
|
first_time_booked_date_utc,
|
|
last_time_booked_date_utc,
|
|
second_to_last_time_booked_date_utc,
|
|
case
|
|
-- 01-New: The listing has been created this month and has not had any
|
|
-- booking
|
|
when
|
|
not accommodation_has_at_least_one_booking
|
|
and accommodation_was_created_this_month
|
|
then '01-New'
|
|
-- 02-Never Booked: The listing has been created before this month and
|
|
-- has not
|
|
-- had any booking
|
|
when
|
|
not accommodation_has_at_least_one_booking
|
|
and not accommodation_was_created_this_month
|
|
then '02-Never Booked'
|
|
-- 03-First Time Booked: The listing has been booked for the first
|
|
-- time and it
|
|
-- has been created this month
|
|
when accommodation_has_first_booking and has_been_booked_within_current_month
|
|
then '03-First Time Booked'
|
|
-- 04-Active: The listing has had at least 1 booking in its history
|
|
-- and it's
|
|
-- been less than 12 months since the last booking
|
|
-- and is not reactivated and is not FTB
|
|
when
|
|
accommodation_has_at_least_one_booking
|
|
and has_been_booked_within_last_12_months
|
|
-- not reactivated
|
|
and not (
|
|
had_previous_booking_more_than_12_months_before_the_last
|
|
and has_been_booked_within_current_month
|
|
)
|
|
-- not FTB
|
|
and not (
|
|
accommodation_has_first_booking and has_been_booked_within_current_month
|
|
)
|
|
then '04-Active'
|
|
-- 05-Churning: The listing has been booked at least once and it's
|
|
-- been 12
|
|
-- months since the last booking
|
|
when accommodation_has_at_least_one_booking and last_booking_was_12_months_ago
|
|
then '05-Churning'
|
|
-- 06-Inactive: The listing has been booked at least once and it's
|
|
-- been more
|
|
-- than 12 months since the last booking
|
|
when
|
|
accommodation_has_at_least_one_booking
|
|
and not has_been_booked_within_last_12_months
|
|
and not last_booking_was_12_months_ago
|
|
then '06-Inactive'
|
|
-- 07-Reactivated: The listing was churned/inactive but has had a new
|
|
-- booking
|
|
-- this month
|
|
when
|
|
had_previous_booking_more_than_12_months_before_the_last
|
|
and has_been_booked_within_current_month
|
|
then '07-Reactivated'
|
|
else null
|
|
end as accommodation_lifecycle_state,
|
|
has_been_booked_within_current_month,
|
|
has_been_booked_within_last_6_months,
|
|
has_been_booked_within_last_12_months
|
|
from accommodation_historic_booking_features
|