From 335ba9a29b4903d47e18a1b1889dc2f499d89fa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oriol=20Roqu=C3=A9=20Paniagua?= Date: Thu, 7 Nov 2024 16:56:43 +0000 Subject: [PATCH] Merged PR 3461: Creates models for listing metrics # Description Adds listing metrics model. It's a bit similar as the Deal metrics, but here we can aggregate by deal :) I'm aware that int_kpis__metric_daily_listings takes quite a bit (3 min). To be optimised later on. # Checklist - [X] The edited models and dependants run properly with production data. - [X] The edited models are sufficiently documented. - [X] The edited models contain PK tests, and I've ran and passed them. - [ ] I have checked for DRY opportunities with other models and docs. - [ ] I've picked the right materialization for the affected models. # Other - [ ] Check if a full-refresh is required after this PR is merged. Related work items: #23567 --- .../kpis/int_kpis__agg_daily_listings.sql | 34 +++ .../kpis/int_kpis__metric_daily_deals.sql | 2 +- .../kpis/int_kpis__metric_daily_listings.sql | 88 +++++++ models/intermediate/kpis/schema.yml | 237 ++++++++++++++++++ .../tmp_kpis_refactor_equivalent_listings.sql | 124 +++++++++ 5 files changed, 484 insertions(+), 1 deletion(-) create mode 100644 models/intermediate/kpis/int_kpis__agg_daily_listings.sql create mode 100644 models/intermediate/kpis/int_kpis__metric_daily_listings.sql create mode 100644 tests/tmp_kpis_refactor_equivalent_listings.sql diff --git a/models/intermediate/kpis/int_kpis__agg_daily_listings.sql b/models/intermediate/kpis/int_kpis__agg_daily_listings.sql new file mode 100644 index 0000000..1a22a8d --- /dev/null +++ b/models/intermediate/kpis/int_kpis__agg_daily_listings.sql @@ -0,0 +1,34 @@ +{% set dimensions = get_kpi_dimensions_per_model("LISTINGS") %} + +{{ config(materialized="table", unique_key=["date", "dimension", "dimension_value"]) }} + + +{% for dimension in dimensions %} + select + -- Unique Key -- + d.date, + {{ dimension.dimension }} as dimension, + {{ dimension.dimension_value }} as dimension_value, + -- Date Attributes -- + d.is_current_month, + d.is_end_of_month, + d.is_month_to_date, + -- Metrics -- + sum(new_listings) as new_listings, + sum(never_booked_listings) as never_booked_listings, + sum(first_time_booked_listings) as first_time_booked_listings, + sum(active_listings) as active_listings, + sum(churning_listings) as churning_listings, + sum(inactive_listings) as inactive_listings, + sum(reactivated_listings) as reactivated_listings, + sum(listings_booked_in_month) as listings_booked_in_month, + sum(listings_booked_in_6_months) as listings_booked_in_6_months, + sum(listings_booked_in_12_months) as listings_booked_in_12_months + from {{ ref("int_kpis__dimension_dates") }} d + left join {{ ref("int_kpis__metric_daily_listings") }} as mdd on d.date = mdd.date + where mdd.id_deal is not null + group by 1, 2, 3, 4, 5, 6 + {% if not loop.last %} + union all + {% endif %} +{% endfor %} diff --git a/models/intermediate/kpis/int_kpis__metric_daily_deals.sql b/models/intermediate/kpis/int_kpis__metric_daily_deals.sql index e27f6a6..6a93dbb 100644 --- a/models/intermediate/kpis/int_kpis__metric_daily_deals.sql +++ b/models/intermediate/kpis/int_kpis__metric_daily_deals.sql @@ -11,13 +11,13 @@ select -- Unique Key -- ldl.date, - -- Dimensions -- coalesce( icd.main_billing_country_iso_3_per_deal, 'UNSET' ) as main_billing_country_iso_3_per_deal, coalesce( icmas.active_accommodations_per_deal_segmentation, 'UNSET' ) as active_accommodations_per_deal_segmentation, + -- Metrics -- count( distinct case when ldl.deal_lifecycle_state = '01-New' then ldl.id_deal else null diff --git a/models/intermediate/kpis/int_kpis__metric_daily_listings.sql b/models/intermediate/kpis/int_kpis__metric_daily_listings.sql new file mode 100644 index 0000000..8c97291 --- /dev/null +++ b/models/intermediate/kpis/int_kpis__metric_daily_listings.sql @@ -0,0 +1,88 @@ +{{ + config( + materialized="table", + unique_key=["date", "id_deal"], + ) +}} +select + -- Unique Key -- + lda.date, + coalesce(icuh.id_deal, 'UNSET') as id_deal, + -- Dimensions -- + coalesce( + icd.main_billing_country_iso_3_per_deal, 'UNSET' + ) as main_billing_country_iso_3_per_deal, + coalesce( + icmas.active_accommodations_per_deal_segmentation, 'UNSET' + ) as active_accommodations_per_deal_segmentation, + -- Metrics -- + count( + distinct case + when lda.accommodation_lifecycle_state = '01-New' + then lda.id_accommodation + else null + end + ) as new_listings, + count( + distinct case + when lda.accommodation_lifecycle_state = '02-Never Booked' + then lda.id_accommodation + else null + end + ) as never_booked_listings, + count( + distinct case + when lda.accommodation_lifecycle_state = '03-First Time Booked' + then lda.id_accommodation + else null + end + ) as first_time_booked_listings, + count( + distinct case + when lda.accommodation_lifecycle_state = '04-Active' + then lda.id_accommodation + else null + end + ) as active_listings, + count( + distinct case + when lda.accommodation_lifecycle_state = '05-Churning' + then lda.id_accommodation + else null + end + ) as churning_listings, + count( + distinct case + when lda.accommodation_lifecycle_state = '06-Inactive' + then lda.id_accommodation + else null + end + ) as inactive_listings, + count( + distinct case + when lda.accommodation_lifecycle_state = '07-Reactivated' + then lda.id_accommodation + else null + end + ) as reactivated_listings, + sum( + case when has_been_booked_within_current_month then 1 else 0 end + ) as listings_booked_in_month, + sum( + case when has_been_booked_within_last_6_months then 1 else 0 end + ) as listings_booked_in_6_months, + sum( + case when has_been_booked_within_last_12_months then 1 else 0 end + ) as listings_booked_in_12_months +from {{ ref("int_kpis__lifecycle_daily_accommodation") }} as lda +left join + {{ ref("int_core__unique_accommodation_to_user") }} as icatu + on icatu.id_accommodation = lda.id_accommodation +left join + {{ ref("int_core__user_host") }} as icuh on icatu.id_user_owner = icuh.id_user_host +left join {{ ref("int_core__deal") }} as icd on icuh.id_deal = icd.id_deal +left join + {{ ref("int_kpis__dimension_daily_accommodation") }} as icmas + on icd.id_deal = icmas.id_deal + and lda.date = icmas.date +group by 1, 2, 3, 4 diff --git a/models/intermediate/kpis/schema.yml b/models/intermediate/kpis/schema.yml index 351bbaf..fe59c41 100644 --- a/models/intermediate/kpis/schema.yml +++ b/models/intermediate/kpis/schema.yml @@ -5544,3 +5544,240 @@ models: data_type: bigint description: | Count of deals booked within the past 12 months for a given date, dimension and value. + + - name: int_kpis__metric_daily_listings + description: | + This model computes the Daily Listing metrics at the deepest granularity. + + Listing metrics at daily level already contain the time dimension + aggregates needed, thus we won't have mtd or monthly equivalent models, + but rather just select from this daily model the needed days to recover + the necessary information. + + The unique key corresponds to the deepest granularity of the model, + in this case: + - date, + - id_deal + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - id_deal + + columns: + - name: date + data_type: date + description: Date containing the Listing metrics. + tests: + - not_null + + - name: id_deal + data_type: string + description: Unique identifier of an account. + tests: + - not_null + + - name: active_accommodations_per_deal_segmentation + data_type: string + description: | + Segment value based on the number of listings booked in 12 months + for a given deal and date. + tests: + - not_null + - accepted_values: + values: + - "0" + - "01-05" + - "06-20" + - "21-60" + - "61+" + - "UNSET" + + - name: main_billing_country_iso_3_per_deal + data_type: string + description: | + Main billing country of the host aggregated at Listing level. + tests: + - not_null + + - name: new_listings + data_type: bigint + description: | + Count of new listings in a given date and per specified dimension. + + - name: never_booked_listings + data_type: bigint + description: | + Count of never booked listings in a given date and per specified dimension. + + - name: first_time_booked_listings + data_type: bigint + description: | + Count of first-time booked listings in a given date and per specified dimension. + + - name: active_listings + data_type: bigint + description: | + Count of active listings in a given date and per specified dimension. + + - name: inactive_listings + data_type: bigint + description: | + Count of inactive listings in a given date and per specified dimension. + + - name: churning_listings + data_type: bigint + description: | + Count of churning listings in a given date and per specified dimension. + + - name: reactivated_listings + data_type: bigint + description: | + Count of reactivated listings in a given date and per specified dimension. + + - name: listings_booked_in_month + data_type: bigint + description: | + Count of listings booked within the month in a given date and per specified dimension. + + - name: listings_booked_in_6_months + data_type: bigint + description: | + Count of listings booked within the past 6 months in a given date and per specified dimension. + + - name: listings_booked_in_12_months + data_type: bigint + description: | + Count of listings booked within the past 12 months in a given date and per specified dimension. + + - name: int_kpis__agg_daily_listings + description: | + This model computes the dimension aggregation for + Daily Listing metrics. + + The primary key of this model is date, dimension + and dimension_value. + + Listing metrics at daily level already contain the time dimension + aggregates needed, thus we won't have mtd or monthly equivalent models, + but rather just select from this daily model the needed days to recover + the necessary information. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - dimension + - dimension_value + + columns: + - name: date + data_type: date + description: Date containing the Listing metrics. + tests: + - not_null + + - name: dimension + data_type: string + description: The dimension or granularity of the metrics. + tests: + - assert_dimension_completeness: + metric_column_names: + - new_listings + - never_booked_listings + - first_time_booked_listings + - active_listings + - churning_listings + - inactive_listings + - reactivated_listings + - listings_booked_in_month + - listings_booked_in_6_months + - listings_booked_in_12_months + + - accepted_values: + values: + - global + - by_number_of_listings + - by_billing_country + - by_deal + + - name: dimension_value + data_type: string + description: The value or segment available for the selected dimension. + tests: + - not_null + + - name: is_end_of_month + data_type: boolean + description: True if it's end of month, false otherwise. + tests: + - not_null + + - name: is_current_month + data_type: boolean + description: | + True if the date is within the current month, false otherwise. + tests: + - not_null + + - name: is_month_to_date + data_type: boolean + description: | + True if the date is within the scope of month-to-date, false otherwise. + The scope of month-to-date takes into account both 1) a date being in + the current month or 2) a date corresponding to the same month of the + previous year, which day number cannot be higher than yesterday's day + number. + tests: + - not_null + + - name: new_listings + data_type: bigint + description: | + Count of new listings for a given date, dimension and value. + + - name: never_booked_listings + data_type: bigint + description: | + Count of never booked listings for a given date, dimension and value. + + - name: first_time_booked_listings + data_type: bigint + description: | + Count of first-time booked listings for a given date, dimension and value. + + - name: active_listings + data_type: bigint + description: | + Count of active listings for a given date, dimension and value. + + - name: inactive_listings + data_type: bigint + description: | + Count of inactive listings for a given date, dimension and value. + + - name: churning_listings + data_type: bigint + description: | + Count of churning listings for a given date, dimension and value. + + - name: reactivated_listings + data_type: bigint + description: | + Count of reactivated listings for a given date, dimension and value. + + - name: listings_booked_in_month + data_type: bigint + description: | + Count of listings booked within the month for a given date, dimension and value. + + - name: listings_booked_in_6_months + data_type: bigint + description: | + Count of listings booked within the past 6 months for a given date, dimension and value. + + - name: listings_booked_in_12_months + data_type: bigint + description: | + Count of listings booked within the past 12 months for a given date, dimension and value. diff --git a/tests/tmp_kpis_refactor_equivalent_listings.sql b/tests/tmp_kpis_refactor_equivalent_listings.sql new file mode 100644 index 0000000..1fe1d1a --- /dev/null +++ b/tests/tmp_kpis_refactor_equivalent_listings.sql @@ -0,0 +1,124 @@ +{% set min_date = "2022-01-01" %} +{% set dimensions = ("global", "by_billing_country") %} +-- "by_number_of_listings" excluded on purpose - there's differences because of daily +-- segmentation +with + new_listings as ( + select + date, + dimension, + dimension_value, + new_listings, + never_booked_listings, + first_time_booked_listings, + active_listings, + churning_listings, + inactive_listings, + reactivated_listings, + listings_booked_in_month, + listings_booked_in_6_months, + listings_booked_in_12_months + from {{ ref("int_kpis__agg_daily_listings") }} + where + date >= '{{ min_date }}' + and dimension in {{ dimensions }} + and dimension_value <> 'UNSET' + and (is_end_of_month = true or is_month_to_date = true) + ), + old_listings as ( + select + date, + dimension, + dimension_value, + new_listings, + never_booked_listings, + first_time_booked_listings, + active_listings, + churning_listings, + inactive_listings, + reactivated_listings, + listings_booked_in_month, + listings_booked_in_6_months, + listings_booked_in_12_months + from {{ ref("int_core__mtd_accommodation_metrics") }} + where date >= '{{ min_date }}' and dimension in {{ dimensions }} + ), + comparison as ( + select + coalesce(o.date, n.date) as date, + coalesce(o.dimension, n.dimension) as dimension, + coalesce(o.dimension_value, n.dimension_value) as dimension_value, + o.new_listings as old_new_listings, + n.new_listings as new_new_listings, + coalesce(o.new_listings, 0) + - coalesce(n.new_listings, 0) as diff_new_listings, + + o.never_booked_listings as old_never_booked_listings, + n.never_booked_listings as new_never_booked_listings, + coalesce(o.never_booked_listings, 0) + - coalesce(n.never_booked_listings, 0) as diff_never_booked_listings, + + o.first_time_booked_listings as old_first_time_booked_listings, + n.first_time_booked_listings as new_first_time_booked_listings, + coalesce(o.first_time_booked_listings, 0) - coalesce( + n.first_time_booked_listings, 0 + ) as diff_first_time_booked_listings, + + o.active_listings as old_active_listings, + n.active_listings as new_active_listings, + coalesce(o.active_listings, 0) + - coalesce(n.active_listings, 0) as diff_active_listings, + + o.inactive_listings as old_inactive_listings, + n.inactive_listings as new_inactive_listings, + coalesce(o.inactive_listings, 0) + - coalesce(n.inactive_listings, 0) as diff_inactive_listings, + + o.churning_listings as old_churning_listings, + n.churning_listings as new_churning_listings, + coalesce(o.churning_listings, 0) + - coalesce(n.churning_listings, 0) as diff_churning_listings, + + o.reactivated_listings as old_reactivated_listings, + n.reactivated_listings as new_reactivated_listings, + coalesce(o.reactivated_listings, 0) + - coalesce(n.reactivated_listings, 0) as diff_reactivated_listings, + + o.listings_booked_in_month as old_listings_booked_in_month, + n.listings_booked_in_month as new_listings_booked_in_month, + coalesce(o.listings_booked_in_month, 0) + - coalesce(n.listings_booked_in_month, 0) as diff_listings_booked_in_month, + + o.listings_booked_in_6_months as old_listings_booked_in_6_months, + n.listings_booked_in_6_months as new_listings_booked_in_6_months, + coalesce(o.listings_booked_in_6_months, 0) - coalesce( + n.listings_booked_in_6_months, 0 + ) as diff_listings_booked_in_6_months, + + o.listings_booked_in_12_months as old_listings_booked_in_12_months, + n.listings_booked_in_12_months as new_listings_booked_in_12_months, + coalesce(o.listings_booked_in_12_months, 0) - coalesce( + n.listings_booked_in_12_months, 0 + ) as diff_listings_booked_in_12_months + + from old_listings o + full outer join + new_listings n + on o.date = n.date + and o.dimension = n.dimension + and o.dimension_value = n.dimension_value + ) +select * +from comparison +where + diff_new_listings <> 0 + or diff_never_booked_listings <> 0 + or diff_first_time_booked_listings <> 0 + or diff_active_listings <> 0 + or diff_inactive_listings <> 0 + or diff_churning_listings <> 0 + or diff_reactivated_listings <> 0 + or diff_listings_booked_in_month <> 0 + or diff_listings_booked_in_6_months <> 0 + or diff_listings_booked_in_12_months <> 0 +order by date desc