Merged PR 3461: Creates models for listing metrics

# Description

Adds listing metrics model. It's a bit similar as the Deal metrics, but here we can aggregate by deal :)

I'm aware that int_kpis__metric_daily_listings takes quite a bit (3 min). To be optimised later on.

# Checklist

- [X] The edited models and dependants run properly with production data.
- [X] The edited models are sufficiently documented.
- [X] The edited models contain PK tests, and I've ran and passed them.
- [ ] I have checked for DRY opportunities with other models and docs.
- [ ] I've picked the right materialization for the affected models.

# Other

- [ ] Check if a full-refresh is required after this PR is merged.

Related work items: #23567
This commit is contained in:
Oriol Roqué Paniagua 2024-11-07 16:56:43 +00:00
parent dada9c289a
commit 335ba9a29b
5 changed files with 484 additions and 1 deletions

View file

@ -0,0 +1,34 @@
{% set dimensions = get_kpi_dimensions_per_model("LISTINGS") %}
{{ config(materialized="table", unique_key=["date", "dimension", "dimension_value"]) }}
{% for dimension in dimensions %}
select
-- Unique Key --
d.date,
{{ dimension.dimension }} as dimension,
{{ dimension.dimension_value }} as dimension_value,
-- Date Attributes --
d.is_current_month,
d.is_end_of_month,
d.is_month_to_date,
-- Metrics --
sum(new_listings) as new_listings,
sum(never_booked_listings) as never_booked_listings,
sum(first_time_booked_listings) as first_time_booked_listings,
sum(active_listings) as active_listings,
sum(churning_listings) as churning_listings,
sum(inactive_listings) as inactive_listings,
sum(reactivated_listings) as reactivated_listings,
sum(listings_booked_in_month) as listings_booked_in_month,
sum(listings_booked_in_6_months) as listings_booked_in_6_months,
sum(listings_booked_in_12_months) as listings_booked_in_12_months
from {{ ref("int_kpis__dimension_dates") }} d
left join {{ ref("int_kpis__metric_daily_listings") }} as mdd on d.date = mdd.date
where mdd.id_deal is not null
group by 1, 2, 3, 4, 5, 6
{% if not loop.last %}
union all
{% endif %}
{% endfor %}

View file

@ -11,13 +11,13 @@
select
-- Unique Key --
ldl.date,
-- Dimensions --
coalesce(
icd.main_billing_country_iso_3_per_deal, 'UNSET'
) as main_billing_country_iso_3_per_deal,
coalesce(
icmas.active_accommodations_per_deal_segmentation, 'UNSET'
) as active_accommodations_per_deal_segmentation,
-- Metrics --
count(
distinct case
when ldl.deal_lifecycle_state = '01-New' then ldl.id_deal else null

View file

@ -0,0 +1,88 @@
{{
config(
materialized="table",
unique_key=["date", "id_deal"],
)
}}
select
-- Unique Key --
lda.date,
coalesce(icuh.id_deal, 'UNSET') as id_deal,
-- Dimensions --
coalesce(
icd.main_billing_country_iso_3_per_deal, 'UNSET'
) as main_billing_country_iso_3_per_deal,
coalesce(
icmas.active_accommodations_per_deal_segmentation, 'UNSET'
) as active_accommodations_per_deal_segmentation,
-- Metrics --
count(
distinct case
when lda.accommodation_lifecycle_state = '01-New'
then lda.id_accommodation
else null
end
) as new_listings,
count(
distinct case
when lda.accommodation_lifecycle_state = '02-Never Booked'
then lda.id_accommodation
else null
end
) as never_booked_listings,
count(
distinct case
when lda.accommodation_lifecycle_state = '03-First Time Booked'
then lda.id_accommodation
else null
end
) as first_time_booked_listings,
count(
distinct case
when lda.accommodation_lifecycle_state = '04-Active'
then lda.id_accommodation
else null
end
) as active_listings,
count(
distinct case
when lda.accommodation_lifecycle_state = '05-Churning'
then lda.id_accommodation
else null
end
) as churning_listings,
count(
distinct case
when lda.accommodation_lifecycle_state = '06-Inactive'
then lda.id_accommodation
else null
end
) as inactive_listings,
count(
distinct case
when lda.accommodation_lifecycle_state = '07-Reactivated'
then lda.id_accommodation
else null
end
) as reactivated_listings,
sum(
case when has_been_booked_within_current_month then 1 else 0 end
) as listings_booked_in_month,
sum(
case when has_been_booked_within_last_6_months then 1 else 0 end
) as listings_booked_in_6_months,
sum(
case when has_been_booked_within_last_12_months then 1 else 0 end
) as listings_booked_in_12_months
from {{ ref("int_kpis__lifecycle_daily_accommodation") }} as lda
left join
{{ ref("int_core__unique_accommodation_to_user") }} as icatu
on icatu.id_accommodation = lda.id_accommodation
left join
{{ ref("int_core__user_host") }} as icuh on icatu.id_user_owner = icuh.id_user_host
left join {{ ref("int_core__deal") }} as icd on icuh.id_deal = icd.id_deal
left join
{{ ref("int_kpis__dimension_daily_accommodation") }} as icmas
on icd.id_deal = icmas.id_deal
and lda.date = icmas.date
group by 1, 2, 3, 4

View file

@ -5544,3 +5544,240 @@ models:
data_type: bigint
description: |
Count of deals booked within the past 12 months for a given date, dimension and value.
- name: int_kpis__metric_daily_listings
description: |
This model computes the Daily Listing metrics at the deepest granularity.
Listing metrics at daily level already contain the time dimension
aggregates needed, thus we won't have mtd or monthly equivalent models,
but rather just select from this daily model the needed days to recover
the necessary information.
The unique key corresponds to the deepest granularity of the model,
in this case:
- date,
- id_deal
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- date
- id_deal
columns:
- name: date
data_type: date
description: Date containing the Listing metrics.
tests:
- not_null
- name: id_deal
data_type: string
description: Unique identifier of an account.
tests:
- not_null
- name: active_accommodations_per_deal_segmentation
data_type: string
description: |
Segment value based on the number of listings booked in 12 months
for a given deal and date.
tests:
- not_null
- accepted_values:
values:
- "0"
- "01-05"
- "06-20"
- "21-60"
- "61+"
- "UNSET"
- name: main_billing_country_iso_3_per_deal
data_type: string
description: |
Main billing country of the host aggregated at Listing level.
tests:
- not_null
- name: new_listings
data_type: bigint
description: |
Count of new listings in a given date and per specified dimension.
- name: never_booked_listings
data_type: bigint
description: |
Count of never booked listings in a given date and per specified dimension.
- name: first_time_booked_listings
data_type: bigint
description: |
Count of first-time booked listings in a given date and per specified dimension.
- name: active_listings
data_type: bigint
description: |
Count of active listings in a given date and per specified dimension.
- name: inactive_listings
data_type: bigint
description: |
Count of inactive listings in a given date and per specified dimension.
- name: churning_listings
data_type: bigint
description: |
Count of churning listings in a given date and per specified dimension.
- name: reactivated_listings
data_type: bigint
description: |
Count of reactivated listings in a given date and per specified dimension.
- name: listings_booked_in_month
data_type: bigint
description: |
Count of listings booked within the month in a given date and per specified dimension.
- name: listings_booked_in_6_months
data_type: bigint
description: |
Count of listings booked within the past 6 months in a given date and per specified dimension.
- name: listings_booked_in_12_months
data_type: bigint
description: |
Count of listings booked within the past 12 months in a given date and per specified dimension.
- name: int_kpis__agg_daily_listings
description: |
This model computes the dimension aggregation for
Daily Listing metrics.
The primary key of this model is date, dimension
and dimension_value.
Listing metrics at daily level already contain the time dimension
aggregates needed, thus we won't have mtd or monthly equivalent models,
but rather just select from this daily model the needed days to recover
the necessary information.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- date
- dimension
- dimension_value
columns:
- name: date
data_type: date
description: Date containing the Listing metrics.
tests:
- not_null
- name: dimension
data_type: string
description: The dimension or granularity of the metrics.
tests:
- assert_dimension_completeness:
metric_column_names:
- new_listings
- never_booked_listings
- first_time_booked_listings
- active_listings
- churning_listings
- inactive_listings
- reactivated_listings
- listings_booked_in_month
- listings_booked_in_6_months
- listings_booked_in_12_months
- accepted_values:
values:
- global
- by_number_of_listings
- by_billing_country
- by_deal
- name: dimension_value
data_type: string
description: The value or segment available for the selected dimension.
tests:
- not_null
- name: is_end_of_month
data_type: boolean
description: True if it's end of month, false otherwise.
tests:
- not_null
- name: is_current_month
data_type: boolean
description: |
True if the date is within the current month, false otherwise.
tests:
- not_null
- name: is_month_to_date
data_type: boolean
description: |
True if the date is within the scope of month-to-date, false otherwise.
The scope of month-to-date takes into account both 1) a date being in
the current month or 2) a date corresponding to the same month of the
previous year, which day number cannot be higher than yesterday's day
number.
tests:
- not_null
- name: new_listings
data_type: bigint
description: |
Count of new listings for a given date, dimension and value.
- name: never_booked_listings
data_type: bigint
description: |
Count of never booked listings for a given date, dimension and value.
- name: first_time_booked_listings
data_type: bigint
description: |
Count of first-time booked listings for a given date, dimension and value.
- name: active_listings
data_type: bigint
description: |
Count of active listings for a given date, dimension and value.
- name: inactive_listings
data_type: bigint
description: |
Count of inactive listings for a given date, dimension and value.
- name: churning_listings
data_type: bigint
description: |
Count of churning listings for a given date, dimension and value.
- name: reactivated_listings
data_type: bigint
description: |
Count of reactivated listings for a given date, dimension and value.
- name: listings_booked_in_month
data_type: bigint
description: |
Count of listings booked within the month for a given date, dimension and value.
- name: listings_booked_in_6_months
data_type: bigint
description: |
Count of listings booked within the past 6 months for a given date, dimension and value.
- name: listings_booked_in_12_months
data_type: bigint
description: |
Count of listings booked within the past 12 months for a given date, dimension and value.

View file

@ -0,0 +1,124 @@
{% set min_date = "2022-01-01" %}
{% set dimensions = ("global", "by_billing_country") %}
-- "by_number_of_listings" excluded on purpose - there's differences because of daily
-- segmentation
with
new_listings as (
select
date,
dimension,
dimension_value,
new_listings,
never_booked_listings,
first_time_booked_listings,
active_listings,
churning_listings,
inactive_listings,
reactivated_listings,
listings_booked_in_month,
listings_booked_in_6_months,
listings_booked_in_12_months
from {{ ref("int_kpis__agg_daily_listings") }}
where
date >= '{{ min_date }}'
and dimension in {{ dimensions }}
and dimension_value <> 'UNSET'
and (is_end_of_month = true or is_month_to_date = true)
),
old_listings as (
select
date,
dimension,
dimension_value,
new_listings,
never_booked_listings,
first_time_booked_listings,
active_listings,
churning_listings,
inactive_listings,
reactivated_listings,
listings_booked_in_month,
listings_booked_in_6_months,
listings_booked_in_12_months
from {{ ref("int_core__mtd_accommodation_metrics") }}
where date >= '{{ min_date }}' and dimension in {{ dimensions }}
),
comparison as (
select
coalesce(o.date, n.date) as date,
coalesce(o.dimension, n.dimension) as dimension,
coalesce(o.dimension_value, n.dimension_value) as dimension_value,
o.new_listings as old_new_listings,
n.new_listings as new_new_listings,
coalesce(o.new_listings, 0)
- coalesce(n.new_listings, 0) as diff_new_listings,
o.never_booked_listings as old_never_booked_listings,
n.never_booked_listings as new_never_booked_listings,
coalesce(o.never_booked_listings, 0)
- coalesce(n.never_booked_listings, 0) as diff_never_booked_listings,
o.first_time_booked_listings as old_first_time_booked_listings,
n.first_time_booked_listings as new_first_time_booked_listings,
coalesce(o.first_time_booked_listings, 0) - coalesce(
n.first_time_booked_listings, 0
) as diff_first_time_booked_listings,
o.active_listings as old_active_listings,
n.active_listings as new_active_listings,
coalesce(o.active_listings, 0)
- coalesce(n.active_listings, 0) as diff_active_listings,
o.inactive_listings as old_inactive_listings,
n.inactive_listings as new_inactive_listings,
coalesce(o.inactive_listings, 0)
- coalesce(n.inactive_listings, 0) as diff_inactive_listings,
o.churning_listings as old_churning_listings,
n.churning_listings as new_churning_listings,
coalesce(o.churning_listings, 0)
- coalesce(n.churning_listings, 0) as diff_churning_listings,
o.reactivated_listings as old_reactivated_listings,
n.reactivated_listings as new_reactivated_listings,
coalesce(o.reactivated_listings, 0)
- coalesce(n.reactivated_listings, 0) as diff_reactivated_listings,
o.listings_booked_in_month as old_listings_booked_in_month,
n.listings_booked_in_month as new_listings_booked_in_month,
coalesce(o.listings_booked_in_month, 0)
- coalesce(n.listings_booked_in_month, 0) as diff_listings_booked_in_month,
o.listings_booked_in_6_months as old_listings_booked_in_6_months,
n.listings_booked_in_6_months as new_listings_booked_in_6_months,
coalesce(o.listings_booked_in_6_months, 0) - coalesce(
n.listings_booked_in_6_months, 0
) as diff_listings_booked_in_6_months,
o.listings_booked_in_12_months as old_listings_booked_in_12_months,
n.listings_booked_in_12_months as new_listings_booked_in_12_months,
coalesce(o.listings_booked_in_12_months, 0) - coalesce(
n.listings_booked_in_12_months, 0
) as diff_listings_booked_in_12_months
from old_listings o
full outer join
new_listings n
on o.date = n.date
and o.dimension = n.dimension
and o.dimension_value = n.dimension_value
)
select *
from comparison
where
diff_new_listings <> 0
or diff_never_booked_listings <> 0
or diff_first_time_booked_listings <> 0
or diff_active_listings <> 0
or diff_inactive_listings <> 0
or diff_churning_listings <> 0
or diff_reactivated_listings <> 0
or diff_listings_booked_in_month <> 0
or diff_listings_booked_in_6_months <> 0
or diff_listings_booked_in_12_months <> 0
order by date desc