Merged PR 2575: Accommodation metrics per customer segmentation

# Description

Modifies `int_core__mtd_accommodation_metrics` to include the customer segmentation based on listings. `schema.yaml` is also affected including new fields and tests. Hardcoded `int_core__mtd_vs_previous_year_metrics` to avoid propagating this upwards and messing up with the data display.

Overall, follows a similar strategy as we did for Booking, Guest Journey and Deal metrics. For reference, here's [the previous PR on Deal](https://guardhog.visualstudio.com/Data/_git/data-dwh-dbt-project/pullrequest/2534). I noticed that I mixed the schema tests of Deals and Accommodations, this PR should fix both.

# Checklist

- [X] The edited models and dependants run properly with production data.
- [X] The edited models are sufficiently documented.
- [X] The edited models contain PK tests, and I've ran and passed them.
- [X] I have checked for DRY opportunities with other models and docs.
- [X] I've picked the right materialization for the affected models.

# Other

- [ ] Check if a full-refresh is required after this PR is merged.

Related work items: #19325
This commit is contained in:
Oriol Roqué Paniagua 2024-08-19 09:03:42 +00:00
parent 52f42b9e9b
commit 89dd8845cc
3 changed files with 126 additions and 68 deletions

View file

@ -2,77 +2,106 @@
This model provides Month-To-Date (MTD) based on Accommodation metrics.
*/
{{ config(materialized="table", unique_key="date") }}
{% set dimensions = get_kpi_dimensions() %}
{{ config(materialized="table", unique_key=["date", "dimension", "dimension_value"]) }}
with
int_core__mtd_accommodation_lifecycle as (
select * from {{ ref("int_core__mtd_accommodation_lifecycle") }}
),
int_dates_mtd as (select * from {{ ref("int_dates_mtd") }}),
int_core__user_host as (select * from {{ ref("int_core__user_host") }}),
int_core__mtd_accommodation_segmentation as (
select * from {{ ref("int_core__mtd_accommodation_segmentation") }}
),
int_dates_mtd_by_dimension as (
select * from {{ ref("int_dates_mtd_by_dimension") }}
),
int_core__unique_accommodation_to_user as (
select * from {{ ref("int_core__unique_accommodation_to_user") }}
),
listings_metric_aggregation_per_date as (
select
al.date,
count(
distinct case
when al.accommodation_lifecycle_state = '01-New'
then al.id_accommodation
else null
end
) as new_listings,
count(
distinct case
when al.accommodation_lifecycle_state = '02-Never Booked'
then al.id_accommodation
else null
end
) as never_booked_listings,
count(
distinct case
when al.accommodation_lifecycle_state = '03-First Time Booked'
then al.id_accommodation
else null
end
) as first_time_booked_listings,
count(
distinct case
when al.accommodation_lifecycle_state = '04-Active'
then al.id_accommodation
else null
end
) as active_listings,
count(
distinct case
when al.accommodation_lifecycle_state = '05-Churning'
then al.id_accommodation
else null
end
) as churning_listings,
count(
distinct case
when al.accommodation_lifecycle_state = '06-Inactive'
then al.id_accommodation
else null
end
) as inactive_listings,
count(
distinct case
when al.accommodation_lifecycle_state = '07-Reactivated'
then al.id_accommodation
else null
end
) as reactivated_listings,
sum(
case when has_been_booked_within_current_month then 1 else 0 end
) as listings_booked_in_month,
sum(
case when has_been_booked_within_last_6_months then 1 else 0 end
) as listings_booked_in_6_months,
sum(
case when has_been_booked_within_last_12_months then 1 else 0 end
) as listings_booked_in_12_months
{% for dimension in dimensions %}
select
al.date,
{{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value,
count(
distinct case
when al.accommodation_lifecycle_state = '01-New'
then al.id_accommodation
else null
end
) as new_listings,
count(
distinct case
when al.accommodation_lifecycle_state = '02-Never Booked'
then al.id_accommodation
else null
end
) as never_booked_listings,
count(
distinct case
when al.accommodation_lifecycle_state = '03-First Time Booked'
then al.id_accommodation
else null
end
) as first_time_booked_listings,
count(
distinct case
when al.accommodation_lifecycle_state = '04-Active'
then al.id_accommodation
else null
end
) as active_listings,
count(
distinct case
when al.accommodation_lifecycle_state = '05-Churning'
then al.id_accommodation
else null
end
) as churning_listings,
count(
distinct case
when al.accommodation_lifecycle_state = '06-Inactive'
then al.id_accommodation
else null
end
) as inactive_listings,
count(
distinct case
when al.accommodation_lifecycle_state = '07-Reactivated'
then al.id_accommodation
else null
end
) as reactivated_listings,
sum(
case when has_been_booked_within_current_month then 1 else 0 end
) as listings_booked_in_month,
sum(
case when has_been_booked_within_last_6_months then 1 else 0 end
) as listings_booked_in_6_months,
sum(
case when has_been_booked_within_last_12_months then 1 else 0 end
) as listings_booked_in_12_months
from int_core__mtd_accommodation_lifecycle al
group by al.date
from int_core__mtd_accommodation_lifecycle al
{% if dimension.dimension == "'by_number_of_listings'" %}
inner join
int_core__unique_accommodation_to_user atu
on atu.id_accommodation = al.id_accommodation
inner join int_core__user_host u on atu.id_user_owner = u.id_user_host
inner join
int_core__mtd_accommodation_segmentation mas
on u.id_deal = mas.id_deal
and al.date = mas.date
{% endif %}
group by 1, 2, 3
{% if not loop.last %}
union all
{% endif %}
{% endfor %}
)
-- Final aggregation of subqueries --
select
@ -80,6 +109,8 @@ select
d.month,
d.day,
d.date,
d.dimension,
d.dimension_value,
d.is_end_of_month,
d.is_current_month,
nullif(l.new_listings, 0) as new_listings,
@ -92,5 +123,9 @@ select
nullif(l.listings_booked_in_month, 0) as listings_booked_in_month,
nullif(l.listings_booked_in_6_months, 0) as listings_booked_in_6_months,
nullif(l.listings_booked_in_12_months, 0) as listings_booked_in_12_months
from int_dates_mtd d
left join listings_metric_aggregation_per_date l on l.date = d.date
from int_dates_mtd_by_dimension d
left join
listings_metric_aggregation_per_date l
on l.date = d.date
and l.dimension = d.dimension
and l.dimension_value = d.dimension_value

View file

@ -307,10 +307,24 @@ models:
columns:
- name: date
data_type: date
description: The date for the month-to-date accommodation-related metrics.
description: The date for the month-to-date deal-related metrics.
tests:
- not_null
- name: dimension
data_type: string
description: The dimension or granularity of the metrics.
tests:
- accepted_values:
values:
- global
- by_number_of_listings
- name: dimension_value
data_type: string
description: The value or segment available for the selected dimension.
tests:
- not_null
- unique
- name: int_core__mtd_deal_metrics
description: |
@ -318,6 +332,13 @@ models:
It's used for the business KPIs. Data is aggregated at the last day of the month and in the
days necessary for the Month-to-Date computation of the current month.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- date
- dimension
- dimension_value
columns:
- name: date
data_type: date

View file

@ -16,6 +16,8 @@ with
),
int_core__mtd_accommodation_metrics as (
select * from {{ ref("int_core__mtd_accommodation_metrics") }}
-- TEMPORAL: FORCING DIMENSION = GLOBAL TO AVOID BREAKING CHANGES IN PRODUCTION
where dimension = 'global'
),
int_core__mtd_deal_metrics as (
select * from {{ ref("int_core__mtd_deal_metrics") }}