From 8ddb6959475f2cdb4f3fd931c5f4a76f9addab83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oriol=20Roqu=C3=A9=20Paniagua?= Date: Wed, 30 Oct 2024 11:45:45 +0000 Subject: [PATCH] Merged PR 3377: Adds GJ Created, Started and Completed in new KPIs flow # Description Adds GJ Created, Started and Completed. Each model has: * A Daily model * A Monthly/MTD without dimension aggregate * A Monthly/MTD with dimension aggregate * A comparison vs. the current flow of KPIs in the form of a test It's quite similar as for Created Bookings. The main difference is that 1) reads from Verification Requests table and 2) I don't compute a New Dash dimension. This helps actually validating the logic of the dimension configuration macro, which works good. I confirm that the 3 tests of comparison vs. current data work well in my local - i.e. identical content for Global, Billing Country for these 3 metrics since 2022. This does not include: * Guest Journeys with Payment. I'll do this in a separated PR * Guest Payments/Revenue. I'll do this in a separated PR # Checklist - [X] The edited models and dependants run properly with production data. - [X] The edited models are sufficiently documented. - [X] The edited models contain PK tests, and I've ran and passed them. - [NA] I have checked for DRY opportunities with other models and docs. **Briefly discussed with Pablo, we'll leave it like this for the time being** - [X] I've picked the right materialization for the affected models. **Runs quite fast** # Other - [ ] Check if a full-refresh is required after this PR is merged. Related work items: #23453 --- ...gated_monthly_completed_guest_journeys.sql | 24 + ...regated_monthly_created_guest_journeys.sql | 24 + ...regated_monthly_started_guest_journeys.sql | 24 + ...ggregated_mtd_completed_guest_journeys.sql | 24 + ..._aggregated_mtd_created_guest_journeys.sql | 24 + ..._aggregated_mtd_started_guest_journeys.sql | 24 + ..._metric_daily_completed_guest_journeys.sql | 23 + ...s__metric_daily_created_guest_journeys.sql | 23 + ...s__metric_daily_started_guest_journeys.sql | 23 + ...etric_monthly_completed_guest_journeys.sql | 27 + ..._metric_monthly_created_guest_journeys.sql | 27 + ..._metric_monthly_started_guest_journeys.sql | 27 + ...s__metric_mtd_completed_guest_journeys.sql | 28 + ...pis__metric_mtd_created_guest_journeys.sql | 28 + ...pis__metric_mtd_started_guest_journeys.sql | 28 + models/intermediate/kpis/schema.yml | 912 +++++++++++++++++- ...or_equivalent_completed_guest_journeys.sql | 53 + ...ctor_equivalent_created_guest_journeys.sql | 53 + ...ctor_equivalent_started_guest_journeys.sql | 53 + 19 files changed, 1446 insertions(+), 3 deletions(-) create mode 100644 models/intermediate/kpis/int_kpis__aggregated_monthly_completed_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__aggregated_monthly_created_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__aggregated_monthly_started_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__aggregated_mtd_completed_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__aggregated_mtd_created_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__aggregated_mtd_started_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__metric_daily_completed_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__metric_daily_created_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__metric_daily_started_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__metric_monthly_completed_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__metric_monthly_created_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__metric_monthly_started_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__metric_mtd_completed_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__metric_mtd_created_guest_journeys.sql create mode 100644 models/intermediate/kpis/int_kpis__metric_mtd_started_guest_journeys.sql create mode 100644 tests/tmp_kpis_refactor_equivalent_completed_guest_journeys.sql create mode 100644 tests/tmp_kpis_refactor_equivalent_created_guest_journeys.sql create mode 100644 tests/tmp_kpis_refactor_equivalent_started_guest_journeys.sql diff --git a/models/intermediate/kpis/int_kpis__aggregated_monthly_completed_guest_journeys.sql b/models/intermediate/kpis/int_kpis__aggregated_monthly_completed_guest_journeys.sql new file mode 100644 index 0000000..5b9207d --- /dev/null +++ b/models/intermediate/kpis/int_kpis__aggregated_monthly_completed_guest_journeys.sql @@ -0,0 +1,24 @@ +{% set dimensions = get_kpi_dimensions_per_model("COMPLETED_GUEST_JOURNEYS") %} + +{{ + config( + materialized="table", unique_key=["end_date", "dimension", "dimension_value"] + ) +}} + + +{% for dimension in dimensions %} + select + -- Unique Key -- + start_date, + end_date, + {{ dimension.dimension }} as dimension, + {{ dimension.dimension_value }} as dimension_value, + -- Metrics -- + sum(completed_guest_journeys) as completed_guest_journeys + from {{ ref("int_kpis__metric_monthly_completed_guest_journeys") }} + group by 1, 2, 3, 4 + {% if not loop.last %} + union all + {% endif %} +{% endfor %} diff --git a/models/intermediate/kpis/int_kpis__aggregated_monthly_created_guest_journeys.sql b/models/intermediate/kpis/int_kpis__aggregated_monthly_created_guest_journeys.sql new file mode 100644 index 0000000..b4affe8 --- /dev/null +++ b/models/intermediate/kpis/int_kpis__aggregated_monthly_created_guest_journeys.sql @@ -0,0 +1,24 @@ +{% set dimensions = get_kpi_dimensions_per_model("CREATED_GUEST_JOURNEYS") %} + +{{ + config( + materialized="table", unique_key=["end_date", "dimension", "dimension_value"] + ) +}} + + +{% for dimension in dimensions %} + select + -- Unique Key -- + start_date, + end_date, + {{ dimension.dimension }} as dimension, + {{ dimension.dimension_value }} as dimension_value, + -- Metrics -- + sum(created_guest_journeys) as created_guest_journeys + from {{ ref("int_kpis__metric_monthly_created_guest_journeys") }} + group by 1, 2, 3, 4 + {% if not loop.last %} + union all + {% endif %} +{% endfor %} diff --git a/models/intermediate/kpis/int_kpis__aggregated_monthly_started_guest_journeys.sql b/models/intermediate/kpis/int_kpis__aggregated_monthly_started_guest_journeys.sql new file mode 100644 index 0000000..3d698a0 --- /dev/null +++ b/models/intermediate/kpis/int_kpis__aggregated_monthly_started_guest_journeys.sql @@ -0,0 +1,24 @@ +{% set dimensions = get_kpi_dimensions_per_model("STARTED_GUEST_JOURNEYS") %} + +{{ + config( + materialized="table", unique_key=["end_date", "dimension", "dimension_value"] + ) +}} + + +{% for dimension in dimensions %} + select + -- Unique Key -- + start_date, + end_date, + {{ dimension.dimension }} as dimension, + {{ dimension.dimension_value }} as dimension_value, + -- Metrics -- + sum(started_guest_journeys) as started_guest_journeys + from {{ ref("int_kpis__metric_monthly_started_guest_journeys") }} + group by 1, 2, 3, 4 + {% if not loop.last %} + union all + {% endif %} +{% endfor %} diff --git a/models/intermediate/kpis/int_kpis__aggregated_mtd_completed_guest_journeys.sql b/models/intermediate/kpis/int_kpis__aggregated_mtd_completed_guest_journeys.sql new file mode 100644 index 0000000..c4ff377 --- /dev/null +++ b/models/intermediate/kpis/int_kpis__aggregated_mtd_completed_guest_journeys.sql @@ -0,0 +1,24 @@ +{% set dimensions = get_kpi_dimensions_per_model("COMPLETED_GUEST_JOURNEYS") %} + +{{ + config( + materialized="table", unique_key=["end_date", "dimension", "dimension_value"] + ) +}} + + +{% for dimension in dimensions %} + select + -- Unique Key -- + start_date, + end_date, + {{ dimension.dimension }} as dimension, + {{ dimension.dimension_value }} as dimension_value, + -- Metrics -- + sum(completed_guest_journeys) as completed_guest_journeys + from {{ ref("int_kpis__metric_mtd_completed_guest_journeys") }} + group by 1, 2, 3, 4 + {% if not loop.last %} + union all + {% endif %} +{% endfor %} diff --git a/models/intermediate/kpis/int_kpis__aggregated_mtd_created_guest_journeys.sql b/models/intermediate/kpis/int_kpis__aggregated_mtd_created_guest_journeys.sql new file mode 100644 index 0000000..ce8f893 --- /dev/null +++ b/models/intermediate/kpis/int_kpis__aggregated_mtd_created_guest_journeys.sql @@ -0,0 +1,24 @@ +{% set dimensions = get_kpi_dimensions_per_model("CREATED_GUEST_JOURNEYS") %} + +{{ + config( + materialized="table", unique_key=["end_date", "dimension", "dimension_value"] + ) +}} + + +{% for dimension in dimensions %} + select + -- Unique Key -- + start_date, + end_date, + {{ dimension.dimension }} as dimension, + {{ dimension.dimension_value }} as dimension_value, + -- Metrics -- + sum(created_guest_journeys) as created_guest_journeys + from {{ ref("int_kpis__metric_mtd_created_guest_journeys") }} + group by 1, 2, 3, 4 + {% if not loop.last %} + union all + {% endif %} +{% endfor %} diff --git a/models/intermediate/kpis/int_kpis__aggregated_mtd_started_guest_journeys.sql b/models/intermediate/kpis/int_kpis__aggregated_mtd_started_guest_journeys.sql new file mode 100644 index 0000000..9c291d7 --- /dev/null +++ b/models/intermediate/kpis/int_kpis__aggregated_mtd_started_guest_journeys.sql @@ -0,0 +1,24 @@ +{% set dimensions = get_kpi_dimensions_per_model("STARTED_GUEST_JOURNEYS") %} + +{{ + config( + materialized="table", unique_key=["end_date", "dimension", "dimension_value"] + ) +}} + + +{% for dimension in dimensions %} + select + -- Unique Key -- + start_date, + end_date, + {{ dimension.dimension }} as dimension, + {{ dimension.dimension_value }} as dimension_value, + -- Metrics -- + sum(started_guest_journeys) as started_guest_journeys + from {{ ref("int_kpis__metric_mtd_started_guest_journeys") }} + group by 1, 2, 3, 4 + {% if not loop.last %} + union all + {% endif %} +{% endfor %} diff --git a/models/intermediate/kpis/int_kpis__metric_daily_completed_guest_journeys.sql b/models/intermediate/kpis/int_kpis__metric_daily_completed_guest_journeys.sql new file mode 100644 index 0000000..df5dd0c --- /dev/null +++ b/models/intermediate/kpis/int_kpis__metric_daily_completed_guest_journeys.sql @@ -0,0 +1,23 @@ +{{ config(materialized="table", unique_key=["date", "id_deal"]) }} +select + -- Unique Key -- + icvr.verification_estimated_completed_date_utc as date, + coalesce(icuh.id_deal, 'UNSET') as id_deal, + -- Dimensions -- + coalesce( + icd.main_billing_country_iso_3_per_deal, 'UNSET' + ) as main_billing_country_iso_3_per_deal, + coalesce( + icmas.active_accommodations_per_deal_segmentation, 'UNSET' + ) as active_accommodations_per_deal_segmentation, + -- Metrics -- + count(distinct icvr.id_verification_request) as completed_guest_journeys +from {{ ref("int_core__verification_requests") }} as icvr +left join + {{ ref("int_core__user_host") }} as icuh on icvr.id_user_host = icuh.id_user_host +left join {{ ref("int_core__deal") }} as icd on icuh.id_deal = icd.id_deal +left join + {{ ref("int_kpis__dimension_daily_accommodation") }} as icmas + on icuh.id_deal = icmas.id_deal + and icvr.verification_estimated_completed_date_utc = icmas.date +group by 1, 2, 3, 4 diff --git a/models/intermediate/kpis/int_kpis__metric_daily_created_guest_journeys.sql b/models/intermediate/kpis/int_kpis__metric_daily_created_guest_journeys.sql new file mode 100644 index 0000000..bf30686 --- /dev/null +++ b/models/intermediate/kpis/int_kpis__metric_daily_created_guest_journeys.sql @@ -0,0 +1,23 @@ +{{ config(materialized="table", unique_key=["date", "id_deal"]) }} +select + -- Unique Key -- + icvr.created_date_utc as date, + coalesce(icuh.id_deal, 'UNSET') as id_deal, + -- Dimensions -- + coalesce( + icd.main_billing_country_iso_3_per_deal, 'UNSET' + ) as main_billing_country_iso_3_per_deal, + coalesce( + icmas.active_accommodations_per_deal_segmentation, 'UNSET' + ) as active_accommodations_per_deal_segmentation, + -- Metrics -- + count(distinct icvr.id_verification_request) as created_guest_journeys +from {{ ref("int_core__verification_requests") }} as icvr +left join + {{ ref("int_core__user_host") }} as icuh on icvr.id_user_host = icuh.id_user_host +left join {{ ref("int_core__deal") }} as icd on icuh.id_deal = icd.id_deal +left join + {{ ref("int_kpis__dimension_daily_accommodation") }} as icmas + on icuh.id_deal = icmas.id_deal + and icvr.created_date_utc = icmas.date +group by 1, 2, 3, 4 diff --git a/models/intermediate/kpis/int_kpis__metric_daily_started_guest_journeys.sql b/models/intermediate/kpis/int_kpis__metric_daily_started_guest_journeys.sql new file mode 100644 index 0000000..31e0342 --- /dev/null +++ b/models/intermediate/kpis/int_kpis__metric_daily_started_guest_journeys.sql @@ -0,0 +1,23 @@ +{{ config(materialized="table", unique_key=["date", "id_deal"]) }} +select + -- Unique Key -- + icvr.verification_estimated_started_date_utc as date, + coalesce(icuh.id_deal, 'UNSET') as id_deal, + -- Dimensions -- + coalesce( + icd.main_billing_country_iso_3_per_deal, 'UNSET' + ) as main_billing_country_iso_3_per_deal, + coalesce( + icmas.active_accommodations_per_deal_segmentation, 'UNSET' + ) as active_accommodations_per_deal_segmentation, + -- Metrics -- + count(distinct icvr.id_verification_request) as started_guest_journeys +from {{ ref("int_core__verification_requests") }} as icvr +left join + {{ ref("int_core__user_host") }} as icuh on icvr.id_user_host = icuh.id_user_host +left join {{ ref("int_core__deal") }} as icd on icuh.id_deal = icd.id_deal +left join + {{ ref("int_kpis__dimension_daily_accommodation") }} as icmas + on icuh.id_deal = icmas.id_deal + and icvr.verification_estimated_started_date_utc = icmas.date +group by 1, 2, 3, 4 diff --git a/models/intermediate/kpis/int_kpis__metric_monthly_completed_guest_journeys.sql b/models/intermediate/kpis/int_kpis__metric_monthly_completed_guest_journeys.sql new file mode 100644 index 0000000..a889ff0 --- /dev/null +++ b/models/intermediate/kpis/int_kpis__metric_monthly_completed_guest_journeys.sql @@ -0,0 +1,27 @@ +{{ + config( + materialized="view", + unique_key=[ + "end_date", + "id_deal", + "active_accommodations_per_deal_segmentation", + ], + ) +}} + +select + -- Unique Key -- + d.first_day_month as start_date, + d.date as end_date, + gj.id_deal, + gj.active_accommodations_per_deal_segmentation, + -- Dimensions -- + gj.main_billing_country_iso_3_per_deal, + -- Metrics -- + sum(gj.completed_guest_journeys) as completed_guest_journeys +from {{ ref("int_kpis__dimension_dates") }} d +left join + {{ ref("int_kpis__metric_daily_completed_guest_journeys") }} gj + on date_trunc('month', gj.date)::date = d.first_day_month +where d.is_end_of_month = true and gj.id_deal is not null +group by 1, 2, 3, 4, 5 diff --git a/models/intermediate/kpis/int_kpis__metric_monthly_created_guest_journeys.sql b/models/intermediate/kpis/int_kpis__metric_monthly_created_guest_journeys.sql new file mode 100644 index 0000000..048a657 --- /dev/null +++ b/models/intermediate/kpis/int_kpis__metric_monthly_created_guest_journeys.sql @@ -0,0 +1,27 @@ +{{ + config( + materialized="view", + unique_key=[ + "end_date", + "id_deal", + "active_accommodations_per_deal_segmentation", + ], + ) +}} + +select + -- Unique Key -- + d.first_day_month as start_date, + d.date as end_date, + gj.id_deal, + gj.active_accommodations_per_deal_segmentation, + -- Dimensions -- + gj.main_billing_country_iso_3_per_deal, + -- Metrics -- + sum(gj.created_guest_journeys) as created_guest_journeys +from {{ ref("int_kpis__dimension_dates") }} d +left join + {{ ref("int_kpis__metric_daily_created_guest_journeys") }} gj + on date_trunc('month', gj.date)::date = d.first_day_month +where d.is_end_of_month = true and gj.id_deal is not null +group by 1, 2, 3, 4, 5 diff --git a/models/intermediate/kpis/int_kpis__metric_monthly_started_guest_journeys.sql b/models/intermediate/kpis/int_kpis__metric_monthly_started_guest_journeys.sql new file mode 100644 index 0000000..cc09a9f --- /dev/null +++ b/models/intermediate/kpis/int_kpis__metric_monthly_started_guest_journeys.sql @@ -0,0 +1,27 @@ +{{ + config( + materialized="view", + unique_key=[ + "end_date", + "id_deal", + "active_accommodations_per_deal_segmentation", + ], + ) +}} + +select + -- Unique Key -- + d.first_day_month as start_date, + d.date as end_date, + gj.id_deal, + gj.active_accommodations_per_deal_segmentation, + -- Dimensions -- + gj.main_billing_country_iso_3_per_deal, + -- Metrics -- + sum(gj.started_guest_journeys) as started_guest_journeys +from {{ ref("int_kpis__dimension_dates") }} d +left join + {{ ref("int_kpis__metric_daily_started_guest_journeys") }} gj + on date_trunc('month', gj.date)::date = d.first_day_month +where d.is_end_of_month = true and gj.id_deal is not null +group by 1, 2, 3, 4, 5 diff --git a/models/intermediate/kpis/int_kpis__metric_mtd_completed_guest_journeys.sql b/models/intermediate/kpis/int_kpis__metric_mtd_completed_guest_journeys.sql new file mode 100644 index 0000000..b32aeea --- /dev/null +++ b/models/intermediate/kpis/int_kpis__metric_mtd_completed_guest_journeys.sql @@ -0,0 +1,28 @@ +{{ + config( + materialized="view", + unique_key=[ + "end_date", + "id_deal", + "active_accommodations_per_deal_segmentation", + ], + ) +}} + +select + -- Unique Key -- + d.first_day_month as start_date, + d.date as end_date, + gj.id_deal, + gj.active_accommodations_per_deal_segmentation, + -- Dimensions -- + gj.main_billing_country_iso_3_per_deal, + -- Metrics -- + sum(gj.completed_guest_journeys) as completed_guest_journeys +from {{ ref("int_kpis__dimension_dates") }} d +left join + {{ ref("int_kpis__metric_daily_completed_guest_journeys") }} gj + on date_trunc('month', gj.date)::date = d.first_day_month + and extract(day from gj.date) <= d.day +where d.is_month_to_date = true and gj.id_deal is not null +group by 1, 2, 3, 4, 5 diff --git a/models/intermediate/kpis/int_kpis__metric_mtd_created_guest_journeys.sql b/models/intermediate/kpis/int_kpis__metric_mtd_created_guest_journeys.sql new file mode 100644 index 0000000..2d600f9 --- /dev/null +++ b/models/intermediate/kpis/int_kpis__metric_mtd_created_guest_journeys.sql @@ -0,0 +1,28 @@ +{{ + config( + materialized="view", + unique_key=[ + "end_date", + "id_deal", + "active_accommodations_per_deal_segmentation", + ], + ) +}} + +select + -- Unique Key -- + d.first_day_month as start_date, + d.date as end_date, + gj.id_deal, + gj.active_accommodations_per_deal_segmentation, + -- Dimensions -- + gj.main_billing_country_iso_3_per_deal, + -- Metrics -- + sum(gj.created_guest_journeys) as created_guest_journeys +from {{ ref("int_kpis__dimension_dates") }} d +left join + {{ ref("int_kpis__metric_daily_created_guest_journeys") }} gj + on date_trunc('month', gj.date)::date = d.first_day_month + and extract(day from gj.date) <= d.day +where d.is_month_to_date = true and gj.id_deal is not null +group by 1, 2, 3, 4, 5 diff --git a/models/intermediate/kpis/int_kpis__metric_mtd_started_guest_journeys.sql b/models/intermediate/kpis/int_kpis__metric_mtd_started_guest_journeys.sql new file mode 100644 index 0000000..2df3ded --- /dev/null +++ b/models/intermediate/kpis/int_kpis__metric_mtd_started_guest_journeys.sql @@ -0,0 +1,28 @@ +{{ + config( + materialized="view", + unique_key=[ + "end_date", + "id_deal", + "active_accommodations_per_deal_segmentation", + ], + ) +}} + +select + -- Unique Key -- + d.first_day_month as start_date, + d.date as end_date, + gj.id_deal, + gj.active_accommodations_per_deal_segmentation, + -- Dimensions -- + gj.main_billing_country_iso_3_per_deal, + -- Metrics -- + sum(gj.started_guest_journeys) as started_guest_journeys +from {{ ref("int_kpis__dimension_dates") }} d +left join + {{ ref("int_kpis__metric_daily_started_guest_journeys") }} gj + on date_trunc('month', gj.date)::date = d.first_day_month + and extract(day from gj.date) <= d.day +where d.is_month_to_date = true and gj.id_deal is not null +group by 1, 2, 3, 4, 5 diff --git a/models/intermediate/kpis/schema.yml b/models/intermediate/kpis/schema.yml index eea8957..19bce55 100644 --- a/models/intermediate/kpis/schema.yml +++ b/models/intermediate/kpis/schema.yml @@ -360,8 +360,8 @@ models: - name: created_bookings data_type: bigint description: | - Count of accummulated bookings created in a given month up to the - given date and per specified dimension. + Count of accummulated bookings created in a given month + and per specified dimension. - name: int_kpis__metric_mtd_created_bookings description: | @@ -497,7 +497,7 @@ models: - name: created_bookings data_type: bigint - description: The month-to-date created bookings for a given date, dimension and value. + description: The monthly created bookings for a given date, dimension and value. - name: int_kpis__aggregated_mtd_created_bookings description: | @@ -552,3 +552,909 @@ models: - name: created_bookings data_type: bigint description: The month-to-date created bookings for a given date, dimension and value. + + - name: int_kpis__metric_daily_created_guest_journeys + description: | + This model computes the Daily Created Guest Journeys at the deepest granularity. + + The unique key corresponds to the deepest granularity of the model, + in this case: + - date, + - id_deal. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - id_deal + + columns: + - name: date + data_type: date + description: Date of when Guest Journeys have been created. + tests: + - not_null + + - name: id_deal + data_type: string + description: Unique identifier of an account. + tests: + - not_null + + - name: active_accommodations_per_deal_segmentation + data_type: string + description: | + Segment value based on the number of listings booked in 12 months + for a given deal and date. + tests: + - not_null + - accepted_values: + values: + - "0" + - "01-05" + - "06-20" + - "21-60" + - "61+" + - "UNSET" + + - name: main_billing_country_iso_3_per_deal + data_type: string + description: | + Main billing country of the host aggregated at Deal level. + tests: + - not_null + + - name: created_guest_journeys + data_type: bigint + description: | + Count of daily guest journeys created in a given date and per specified dimension. + + - name: int_kpis__metric_monthly_created_guest_journeys + description: | + This model computes the Monthly Created Guest Journeys at the + deepest granularity. + Be aware that any dimension that can change over the monthly period, + such as daily segmentations, are included in the primary key of the + model. + + The unique key corresponds to: + - end_date, + - id_deal, + - active_accommodations_per_deal_segmentation. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - end_date + - id_deal + - active_accommodations_per_deal_segmentation + + columns: + - name: start_date + data_type: date + description: | + The start date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: end_date + data_type: date + description: | + The end date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: id_deal + data_type: string + description: Unique identifier of an account. + tests: + - not_null + + - name: active_accommodations_per_deal_segmentation + data_type: string + description: | + Segment value based on the number of listings booked in 12 months + for a given deal and date. + tests: + - not_null + - accepted_values: + values: + - "0" + - "01-05" + - "06-20" + - "21-60" + - "61+" + - "UNSET" + + - name: main_billing_country_iso_3_per_deal + data_type: string + description: | + Main billing country of the host aggregated at Deal level. + tests: + - not_null + + - name: created_guest_journeys + data_type: bigint + description: | + Count of accummulated guest journeys created in a given month + and per specified dimension. + + - name: int_kpis__metric_mtd_created_guest_journeys + description: | + This model computes the Month-To-Date Created Guest Journeys at the + deepest granularity. + Be aware that any dimension that can change over the monthly period, + such as daily segmentations, are included in the primary key of the + model. + + The unique key corresponds to: + - end_date, + - id_deal, + - active_accommodations_per_deal_segmentation. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - end_date + - id_deal + - active_accommodations_per_deal_segmentation + + columns: + - name: start_date + data_type: date + description: | + The start date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: end_date + data_type: date + description: | + The end date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: id_deal + data_type: string + description: Unique identifier of an account. + tests: + - not_null + + - name: active_accommodations_per_deal_segmentation + data_type: string + description: | + Segment value based on the number of listings booked in 12 months + for a given deal and date. + tests: + - not_null + - accepted_values: + values: + - "0" + - "01-05" + - "06-20" + - "21-60" + - "61+" + - "UNSET" + + - name: main_billing_country_iso_3_per_deal + data_type: string + description: | + Main billing country of the host aggregated at Deal level. + tests: + - not_null + + - name: created_guest_journeys + data_type: bigint + description: | + Count of accummulated guest journeys created in a given month up to the + given date and per specified dimension. + + - name: int_kpis__aggregated_monthly_created_guest_journeys + description: | + This model computes the dimension aggregation for + Monthly Created Guest Journeys. + + The primary key of this model is end_date, dimension + and dimension_value. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - end_date + - dimension + - dimension_value + + columns: + - name: start_date + data_type: date + description: | + The start date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: end_date + data_type: date + description: | + The end date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: dimension + data_type: string + description: The dimension or granularity of the metrics. + tests: + - assert_dimension_completeness: + metric_column_name: created_guest_journeys + - accepted_values: + values: + - global + - by_number_of_listings + - by_billing_country + - by_deal + + - name: dimension_value + data_type: string + description: The value or segment available for the selected dimension. + tests: + - not_null + + - name: created_guest_journeys + data_type: bigint + description: The monthtly created guest journeys for a given date, dimension and value. + + - name: int_kpis__aggregated_mtd_created_guest_journeys + description: | + This model computes the dimension aggregation for + Month-To-Date Created Guest Journeys. + + The primary key of this model is end_date, dimension + and dimension_value. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - end_date + - dimension + - dimension_value + + columns: + - name: start_date + data_type: date + description: | + The start date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: end_date + data_type: date + description: | + The end date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: dimension + data_type: string + description: The dimension or granularity of the metrics. + tests: + - assert_dimension_completeness: + metric_column_name: created_guest_journeys + - accepted_values: + values: + - global + - by_number_of_listings + - by_billing_country + - by_deal + + - name: dimension_value + data_type: string + description: The value or segment available for the selected dimension. + tests: + - not_null + + - name: created_guest_journeys + data_type: bigint + description: The month-to-date created guest journeys for a given date, dimension and value. + + - name: int_kpis__metric_daily_started_guest_journeys + description: | + This model computes the Daily Started Guest Journeys at the deepest granularity. + + The unique key corresponds to the deepest granularity of the model, + in this case: + - date, + - id_deal. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - id_deal + + columns: + - name: date + data_type: date + description: Date of when Guest Journeys have been started. + tests: + - not_null + + - name: id_deal + data_type: string + description: Unique identifier of an account. + tests: + - not_null + + - name: active_accommodations_per_deal_segmentation + data_type: string + description: | + Segment value based on the number of listings booked in 12 months + for a given deal and date. + tests: + - not_null + - accepted_values: + values: + - "0" + - "01-05" + - "06-20" + - "21-60" + - "61+" + - "UNSET" + + - name: main_billing_country_iso_3_per_deal + data_type: string + description: | + Main billing country of the host aggregated at Deal level. + tests: + - not_null + + - name: started_guest_journeys + data_type: bigint + description: | + Count of daily guest journeys started in a given date and per specified dimension. + + - name: int_kpis__metric_monthly_started_guest_journeys + description: | + This model computes the Monthly Started Guest Journeys at the + deepest granularity. + Be aware that any dimension that can change over the monthly period, + such as daily segmentations, are included in the primary key of the + model. + + The unique key corresponds to: + - end_date, + - id_deal, + - active_accommodations_per_deal_segmentation. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - end_date + - id_deal + - active_accommodations_per_deal_segmentation + + columns: + - name: start_date + data_type: date + description: | + The start date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: end_date + data_type: date + description: | + The end date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: id_deal + data_type: string + description: Unique identifier of an account. + tests: + - not_null + + - name: active_accommodations_per_deal_segmentation + data_type: string + description: | + Segment value based on the number of listings booked in 12 months + for a given deal and date. + tests: + - not_null + - accepted_values: + values: + - "0" + - "01-05" + - "06-20" + - "21-60" + - "61+" + - "UNSET" + + - name: main_billing_country_iso_3_per_deal + data_type: string + description: | + Main billing country of the host aggregated at Deal level. + tests: + - not_null + + - name: started_guest_journeys + data_type: bigint + description: | + Count of accummulated guest journeys started in a given month + and per specified dimension. + + - name: int_kpis__metric_mtd_started_guest_journeys + description: | + This model computes the Month-To-Date Started Guest Journeys at the + deepest granularity. + Be aware that any dimension that can change over the monthly period, + such as daily segmentations, are included in the primary key of the + model. + + The unique key corresponds to: + - end_date, + - id_deal, + - active_accommodations_per_deal_segmentation. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - end_date + - id_deal + - active_accommodations_per_deal_segmentation + + columns: + - name: start_date + data_type: date + description: | + The start date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: end_date + data_type: date + description: | + The end date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: id_deal + data_type: string + description: Unique identifier of an account. + tests: + - not_null + + - name: active_accommodations_per_deal_segmentation + data_type: string + description: | + Segment value based on the number of listings booked in 12 months + for a given deal and date. + tests: + - not_null + - accepted_values: + values: + - "0" + - "01-05" + - "06-20" + - "21-60" + - "61+" + - "UNSET" + + - name: main_billing_country_iso_3_per_deal + data_type: string + description: | + Main billing country of the host aggregated at Deal level. + tests: + - not_null + + - name: started_guest_journeys + data_type: bigint + description: | + Count of accummulated guest journeys started in a given month up to the + given date and per specified dimension. + + - name: int_kpis__aggregated_monthly_started_guest_journeys + description: | + This model computes the dimension aggregation for + Monthly Started Guest Journeys. + + The primary key of this model is end_date, dimension + and dimension_value. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - end_date + - dimension + - dimension_value + + columns: + - name: start_date + data_type: date + description: | + The start date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: end_date + data_type: date + description: | + The end date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: dimension + data_type: string + description: The dimension or granularity of the metrics. + tests: + - assert_dimension_completeness: + metric_column_name: started_guest_journeys + - accepted_values: + values: + - global + - by_number_of_listings + - by_billing_country + - by_deal + + - name: dimension_value + data_type: string + description: The value or segment available for the selected dimension. + tests: + - not_null + + - name: started_guest_journeys + data_type: bigint + description: The monthly started guest journeys for a given date, dimension and value. + + - name: int_kpis__aggregated_mtd_started_guest_journeys + description: | + This model computes the dimension aggregation for + Month-To-Date Started Guest Journeys. + + The primary key of this model is end_date, dimension + and dimension_value. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - end_date + - dimension + - dimension_value + + columns: + - name: start_date + data_type: date + description: | + The start date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: end_date + data_type: date + description: | + The end date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: dimension + data_type: string + description: The dimension or granularity of the metrics. + tests: + - assert_dimension_completeness: + metric_column_name: started_guest_journeys + - accepted_values: + values: + - global + - by_number_of_listings + - by_billing_country + - by_deal + + - name: dimension_value + data_type: string + description: The value or segment available for the selected dimension. + tests: + - not_null + + - name: started_guest_journeys + data_type: bigint + description: The month-to-date started guest journeys for a given date, dimension and value. + + - name: int_kpis__metric_daily_completed_guest_journeys + description: | + This model computes the Daily Completed Guest Journeys at the deepest granularity. + + The unique key corresponds to the deepest granularity of the model, + in this case: + - date, + - id_deal. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - id_deal + + columns: + - name: date + data_type: date + description: Date of when Guest Journeys have been completed. + tests: + - not_null + + - name: id_deal + data_type: string + description: Unique identifier of an account. + tests: + - not_null + + - name: active_accommodations_per_deal_segmentation + data_type: string + description: | + Segment value based on the number of listings booked in 12 months + for a given deal and date. + tests: + - not_null + - accepted_values: + values: + - "0" + - "01-05" + - "06-20" + - "21-60" + - "61+" + - "UNSET" + + - name: main_billing_country_iso_3_per_deal + data_type: string + description: | + Main billing country of the host aggregated at Deal level. + tests: + - not_null + + - name: completed_guest_journeys + data_type: bigint + description: | + Count of daily guest journeys completed in a given date and per specified dimension. + + - name: int_kpis__metric_monthly_completed_guest_journeys + description: | + This model computes the Monthly Completed Guest Journeys at the + deepest granularity. + Be aware that any dimension that can change over the monthly period, + such as daily segmentations, are included in the primary key of the + model. + + The unique key corresponds to: + - end_date, + - id_deal, + - active_accommodations_per_deal_segmentation. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - end_date + - id_deal + - active_accommodations_per_deal_segmentation + + columns: + - name: start_date + data_type: date + description: | + The start date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: end_date + data_type: date + description: | + The end date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: id_deal + data_type: string + description: Unique identifier of an account. + tests: + - not_null + + - name: active_accommodations_per_deal_segmentation + data_type: string + description: | + Segment value based on the number of listings booked in 12 months + for a given deal and date. + tests: + - not_null + - accepted_values: + values: + - "0" + - "01-05" + - "06-20" + - "21-60" + - "61+" + - "UNSET" + + - name: main_billing_country_iso_3_per_deal + data_type: string + description: | + Main billing country of the host aggregated at Deal level. + tests: + - not_null + + - name: completed_guest_journeys + data_type: bigint + description: | + Count of accummulated guest journeys completed in a given month + and per specified dimension. + + - name: int_kpis__metric_mtd_completed_guest_journeys + description: | + This model computes the Month-To-Date Completed Guest Journeys at the + deepest granularity. + Be aware that any dimension that can change over the monthly period, + such as daily segmentations, are included in the primary key of the + model. + + The unique key corresponds to: + - end_date, + - id_deal, + - active_accommodations_per_deal_segmentation. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - end_date + - id_deal + - active_accommodations_per_deal_segmentation + + columns: + - name: start_date + data_type: date + description: | + The start date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: end_date + data_type: date + description: | + The end date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: id_deal + data_type: string + description: Unique identifier of an account. + tests: + - not_null + + - name: active_accommodations_per_deal_segmentation + data_type: string + description: | + Segment value based on the number of listings booked in 12 months + for a given deal and date. + tests: + - not_null + - accepted_values: + values: + - "0" + - "01-05" + - "06-20" + - "21-60" + - "61+" + - "UNSET" + + - name: main_billing_country_iso_3_per_deal + data_type: string + description: | + Main billing country of the host aggregated at Deal level. + tests: + - not_null + + - name: completed_guest_journeys + data_type: bigint + description: | + Count of accummulated guest journeys completed in a given month up to the + given date and per specified dimension. + + - name: int_kpis__aggregated_monthly_completed_guest_journeys + description: | + This model computes the dimension aggregation for + Monthly Completed Guest Journeys. + + The primary key of this model is end_date, dimension + and dimension_value. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - end_date + - dimension + - dimension_value + + columns: + - name: start_date + data_type: date + description: | + The start date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: end_date + data_type: date + description: | + The end date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: dimension + data_type: string + description: The dimension or granularity of the metrics. + tests: + - assert_dimension_completeness: + metric_column_name: completed_guest_journeys + - accepted_values: + values: + - global + - by_number_of_listings + - by_billing_country + - by_deal + + - name: dimension_value + data_type: string + description: The value or segment available for the selected dimension. + tests: + - not_null + + - name: completed_guest_journeys + data_type: bigint + description: The monthly completed guest journeys for a given date, dimension and value. + + - name: int_kpis__aggregated_mtd_completed_guest_journeys + description: | + This model computes the dimension aggregation for + Month-To-Date Completed Guest Journeys. + + The primary key of this model is end_date, dimension + and dimension_value. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - end_date + - dimension + - dimension_value + + columns: + - name: start_date + data_type: date + description: | + The start date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: end_date + data_type: date + description: | + The end date of the time range considered for the metrics in this record. + tests: + - not_null + + - name: dimension + data_type: string + description: The dimension or granularity of the metrics. + tests: + - assert_dimension_completeness: + metric_column_name: completed_guest_journeys + - accepted_values: + values: + - global + - by_number_of_listings + - by_billing_country + - by_deal + + - name: dimension_value + data_type: string + description: The value or segment available for the selected dimension. + tests: + - not_null + + - name: completed_guest_journeys + data_type: bigint + description: The month-to-date completed guest journeys for a given date, dimension and value. diff --git a/tests/tmp_kpis_refactor_equivalent_completed_guest_journeys.sql b/tests/tmp_kpis_refactor_equivalent_completed_guest_journeys.sql new file mode 100644 index 0000000..aaea6ca --- /dev/null +++ b/tests/tmp_kpis_refactor_equivalent_completed_guest_journeys.sql @@ -0,0 +1,53 @@ +{% set min_date = "2022-01-01" %} +{% set dimensions = ("global", "by_billing_country") %} +-- "by_number_of_listings" excluded on purpose - there's differences because of daily +-- segmentation +with + new_mtd_completed_guest_journeys as ( + select end_date as date, dimension, dimension_value, completed_guest_journeys + from {{ ref("int_kpis__aggregated_mtd_completed_guest_journeys") }} + where + end_date >= '{{ min_date }}' + and dimension in {{ dimensions }} + and dimension_value <> 'UNSET' + ), + new_monthly_completed_guest_journeys as ( + select end_date as date, dimension, dimension_value, completed_guest_journeys + from {{ ref("int_kpis__aggregated_monthly_completed_guest_journeys") }} + where + end_date >= '{{ min_date }}' + and dimension in {{ dimensions }} + and dimension_value <> 'UNSET' + ), + new_completed_guest_journeys as ( + select * + from new_mtd_completed_guest_journeys + union all + select * + from new_monthly_completed_guest_journeys + ), + old_completed_guest_journeys as ( + select date, dimension, dimension_value, completed_guest_journeys + from {{ ref("int_core__mtd_guest_journey_metrics") }} + where date >= '{{ min_date }}' and dimension in {{ dimensions }} + ), + comparison as ( + select + coalesce(o.date, n.date) as date, + coalesce(o.dimension, n.dimension) as dimension, + coalesce(o.dimension_value, n.dimension_value) as dimension_value, + o.completed_guest_journeys as old_completed_guest_journeys, + n.completed_guest_journeys as new_completed_guest_journeys, + coalesce(o.completed_guest_journeys, 0) + - coalesce(n.completed_guest_journeys, 0) as diff + from old_completed_guest_journeys o + full outer join + new_completed_guest_journeys n + on o.date = n.date + and o.dimension = n.dimension + and o.dimension_value = n.dimension_value + ) +select * +from comparison +where diff <> 0 +order by date desc, abs(diff) desc diff --git a/tests/tmp_kpis_refactor_equivalent_created_guest_journeys.sql b/tests/tmp_kpis_refactor_equivalent_created_guest_journeys.sql new file mode 100644 index 0000000..ad58f8c --- /dev/null +++ b/tests/tmp_kpis_refactor_equivalent_created_guest_journeys.sql @@ -0,0 +1,53 @@ +{% set min_date = "2022-01-01" %} +{% set dimensions = ("global", "by_billing_country") %} +-- "by_number_of_listings" excluded on purpose - there's differences because of daily +-- segmentation +with + new_mtd_created_guest_journeys as ( + select end_date as date, dimension, dimension_value, created_guest_journeys + from {{ ref("int_kpis__aggregated_mtd_created_guest_journeys") }} + where + end_date >= '{{ min_date }}' + and dimension in {{ dimensions }} + and dimension_value <> 'UNSET' + ), + new_monthly_created_guest_journeys as ( + select end_date as date, dimension, dimension_value, created_guest_journeys + from {{ ref("int_kpis__aggregated_monthly_created_guest_journeys") }} + where + end_date >= '{{ min_date }}' + and dimension in {{ dimensions }} + and dimension_value <> 'UNSET' + ), + new_created_guest_journeys as ( + select * + from new_mtd_created_guest_journeys + union all + select * + from new_monthly_created_guest_journeys + ), + old_created_guest_journeys as ( + select date, dimension, dimension_value, created_guest_journeys + from {{ ref("int_core__mtd_guest_journey_metrics") }} + where date >= '{{ min_date }}' and dimension in {{ dimensions }} + ), + comparison as ( + select + coalesce(o.date, n.date) as date, + coalesce(o.dimension, n.dimension) as dimension, + coalesce(o.dimension_value, n.dimension_value) as dimension_value, + o.created_guest_journeys as old_created_guest_journeys, + n.created_guest_journeys as new_created_guest_journeys, + coalesce(o.created_guest_journeys, 0) + - coalesce(n.created_guest_journeys, 0) as diff + from old_created_guest_journeys o + full outer join + new_created_guest_journeys n + on o.date = n.date + and o.dimension = n.dimension + and o.dimension_value = n.dimension_value + ) +select * +from comparison +where diff <> 0 +order by date desc, abs(diff) desc diff --git a/tests/tmp_kpis_refactor_equivalent_started_guest_journeys.sql b/tests/tmp_kpis_refactor_equivalent_started_guest_journeys.sql new file mode 100644 index 0000000..b7bbd49 --- /dev/null +++ b/tests/tmp_kpis_refactor_equivalent_started_guest_journeys.sql @@ -0,0 +1,53 @@ +{% set min_date = "2022-01-01" %} +{% set dimensions = ("global", "by_billing_country") %} +-- "by_number_of_listings" excluded on purpose - there's differences because of daily +-- segmentation +with + new_mtd_started_guest_journeys as ( + select end_date as date, dimension, dimension_value, started_guest_journeys + from {{ ref("int_kpis__aggregated_mtd_started_guest_journeys") }} + where + end_date >= '{{ min_date }}' + and dimension in {{ dimensions }} + and dimension_value <> 'UNSET' + ), + new_monthly_started_guest_journeys as ( + select end_date as date, dimension, dimension_value, started_guest_journeys + from {{ ref("int_kpis__aggregated_monthly_started_guest_journeys") }} + where + end_date >= '{{ min_date }}' + and dimension in {{ dimensions }} + and dimension_value <> 'UNSET' + ), + new_started_guest_journeys as ( + select * + from new_mtd_started_guest_journeys + union all + select * + from new_monthly_started_guest_journeys + ), + old_started_guest_journeys as ( + select date, dimension, dimension_value, started_guest_journeys + from {{ ref("int_core__mtd_guest_journey_metrics") }} + where date >= '{{ min_date }}' and dimension in {{ dimensions }} + ), + comparison as ( + select + coalesce(o.date, n.date) as date, + coalesce(o.dimension, n.dimension) as dimension, + coalesce(o.dimension_value, n.dimension_value) as dimension_value, + o.started_guest_journeys as old_started_guest_journeys, + n.started_guest_journeys as new_started_guest_journeys, + coalesce(o.started_guest_journeys, 0) + - coalesce(n.started_guest_journeys, 0) as diff + from old_started_guest_journeys o + full outer join + new_started_guest_journeys n + on o.date = n.date + and o.dimension = n.dimension + and o.dimension_value = n.dimension_value + ) +select * +from comparison +where diff <> 0 +order by date desc, abs(diff) desc