From 004616bb794fc96a190e5c676084c13cc1f85280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oriol=20Roqu=C3=A9=20Paniagua?= Date: Wed, 16 Oct 2024 11:56:49 +0000 Subject: [PATCH 1/2] Merged PR 3187: Move deal lifecycle related models to cross # Description Moves from intermediate/core to intermediate/cross the following models: - `int_core__mtd_deal_lifecycle` - `int_core__mtd_deal_metrics` to their equivalents: - `int_mtd_deal_lifecycle` - `int_mtd_deal_metrics` This also changes the schema entries, from core to cross, including changing the name of the model in the entry. This also changes the dependencies, namely in `int_mtd_deal_metrics`, `int_mtd_vs_previous_year_metrics` and `int_monthly_aggregated_metrics_history_by_deal`. This does NOT aim to alter the logic of the lifecycle in any case; it will be done in a separated PR. Runs correctly end-to-end. We might need to drop the old models from production manually. # Checklist - [X] The edited models and dependants run properly with production data. - [X] The edited models are sufficiently documented. - [X] The edited models contain PK tests, and I've ran and passed them. - [X] I have checked for DRY opportunities with other models and docs. - [X] I've picked the right materialization for the affected models. # Other - [ ] Check if a full-refresh is required after this PR is merged. Related work items: #22689 --- models/intermediate/core/schema.yml | 126 ------------------ ...hly_aggregated_metrics_history_by_deal.sql | 6 +- .../int_mtd_deal_lifecycle.sql} | 0 .../int_mtd_deal_metrics.sql} | 15 +-- .../int_mtd_vs_previous_year_metrics.sql | 6 +- models/intermediate/cross/schema.yml | 126 ++++++++++++++++++ 6 files changed, 136 insertions(+), 143 deletions(-) rename models/intermediate/{core/int_core__mtd_deal_lifecycle.sql => cross/int_mtd_deal_lifecycle.sql} (100%) rename models/intermediate/{core/int_core__mtd_deal_metrics.sql => cross/int_mtd_deal_metrics.sql} (93%) diff --git a/models/intermediate/core/schema.yml b/models/intermediate/core/schema.yml index 17fe0c2..e6f7f9b 100644 --- a/models/intermediate/core/schema.yml +++ b/models/intermediate/core/schema.yml @@ -729,42 +729,6 @@ models: tests: - not_null - - name: int_core__mtd_deal_metrics - description: | - This model contains the historic information regarding the deals in an aggregated manner. - It's used for the business KPIs. Data is aggregated at the last day of the month and in the - days necessary for the Month-to-Date computation of the current month. - - tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - date - - dimension - - dimension_value - - columns: - - name: date - data_type: date - description: The date for the month-to-date deal-related metrics. - tests: - - not_null - - - name: dimension - data_type: string - description: The dimension or granularity of the metrics. - tests: - - accepted_values: - values: - - global - - by_number_of_listings - - by_billing_country - - - name: dimension_value - data_type: string - description: The value or segment available for the selected dimension. - tests: - - not_null - - name: int_core__mtd_guest_payments_metrics description: | This model contains the historic information regarding the guest payments in an aggregated manner. @@ -1495,96 +1459,6 @@ models: data_type: integer description: Informative field of the actual number of listings booked in the last 12 months - - name: int_core__mtd_deal_lifecycle - description: | - This model contains the historic information regarding the lifecycle of hosts, at deal level. - The information regarding the booking-related time allows for the current status of any - deal regarding its activity. This information is encapsulated in the following columns: - - deal_lifecycle_state: contains one of the following states - - 01-New: Deals that have been created in the current month, without bookings - - 02-Never Booked: Deals that have been created before the current month, without bookings. - - 03-First Time Booked: Deals that have been booked for the first time in the current month. - - 04-Active: Deals that have booking activity in the past 12 months (that are not FTB nor reactivated) - - 05-Churning: Deals that are becoming inactive because of lack of bookings in the past 12 months - - 06-Inactive: Deals that have not had a booking for more than 12 months. - - 07-Reactivated: Deals that have had a booking in the current month that were inactive or churning before. - - Finally, if none of the logic applies, which should not happen, null will be set and a dbt alert will raise. - - Since the states of Active, First Time Booked and Reactivated indicate certain booking activity and are - mutually exclusive, the model also provides information of the recency of the bookings by the following - booleans: - - has_been_booked_within_current_month: If a deal has had a booking created in the current month - - has_been_booked_within_last_6_months: If a deal has had a booking created in the past 6 months - - has_been_booked_within_last_12_months: If a deal has had a booking created in the past 12 months - Note that if a deal has had a booking created in a given month, all 3 columns will be true. Similarly, - if the last booking created to a deal was 5 months ago, only the column has_been_booked_in_1_month - will be false; while the other 2 will be true. - tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - date - - id_deal - - columns: - - name: date - data_type: date - description: The date for the month-to-date. Information is inclusive to the date displayed. - tests: - - not_null - - - name: id_deal - data_type: character varying - description: Id of the deal associated to the host. - tests: - - not_null - - - name: creation_date_utc - data_type: date - description: Date of when the first host associated to that deal was created. - - - name: first_time_booked_date_utc - data_type: date - description: | - Date of the first booking created for a given deal. Can be null if the deal - has never had a booking associated with it. - - - name: last_time_booked_date_utc - data_type: date - description: | - Date of the last booking created for a given deal. Can be null if the deal - has never had a booking associated with it. Can be the same as first_time_booked_date_utc - if the deal only had 1 booking in its history. - - - name: second_to_last_time_booked_date_utc - data_type: date - description: | - Date of the second-to-last booking created for a given deal, meaning the creation - date of the booking that precedes the last one. It's relevant for the reactivation computation - on the lifecycle. Can be null if the deal has never had a booking associated with it or if - the deal only had 1 booking in its history. - - - name: deal_lifecycle_state - data_type: character varying - description: | - Contains the lifecycle state of a deal. The accepted values are: - 01-New, 02-Never Booked, 03-First Time Booked, 04-Active, 05-Churning, 06-Inactive, - 07-Reactivated. Failing to implement the logic will result in alert. - tests: - - not_null - - - name: has_been_booked_within_current_month - data_type: boolean - description: If the deal has had a booking created in the current month. - - - name: has_been_booked_within_last_6_months - data_type: boolean - description: If the deal has had a booking created in the past 6 months. - - - name: has_been_booked_within_last_12_months - data_type: boolean - description: If the deal has had a booking created in the past 12 months. - - name: int_core__check_in_cover_users description: This model contains information about hosts that offer check in cover. diff --git a/models/intermediate/cross/int_monthly_aggregated_metrics_history_by_deal.sql b/models/intermediate/cross/int_monthly_aggregated_metrics_history_by_deal.sql index 3657f18..05e5203 100644 --- a/models/intermediate/cross/int_monthly_aggregated_metrics_history_by_deal.sql +++ b/models/intermediate/cross/int_monthly_aggregated_metrics_history_by_deal.sql @@ -4,9 +4,7 @@ This model aggregates the different metrics by deal for those hosts that have it */ with int_dates_by_deal as (select * from {{ ref("int_dates_by_deal") }}), - int_core__mtd_deal_lifecycle as ( - select * from {{ ref("int_core__mtd_deal_lifecycle") }} - ), + int_mtd_deal_lifecycle as (select * from {{ ref("int_mtd_deal_lifecycle") }}), int_core__monthly_guest_journey_history_by_deal as ( select * from {{ ref("int_core__monthly_guest_journey_history_by_deal") }} ), @@ -130,7 +128,7 @@ select from int_dates_by_deal d left join - int_core__mtd_deal_lifecycle deal_lifecycle + int_mtd_deal_lifecycle deal_lifecycle on d.date = deal_lifecycle.date and d.id_deal = deal_lifecycle.id_deal left join diff --git a/models/intermediate/core/int_core__mtd_deal_lifecycle.sql b/models/intermediate/cross/int_mtd_deal_lifecycle.sql similarity index 100% rename from models/intermediate/core/int_core__mtd_deal_lifecycle.sql rename to models/intermediate/cross/int_mtd_deal_lifecycle.sql diff --git a/models/intermediate/core/int_core__mtd_deal_metrics.sql b/models/intermediate/cross/int_mtd_deal_metrics.sql similarity index 93% rename from models/intermediate/core/int_core__mtd_deal_metrics.sql rename to models/intermediate/cross/int_mtd_deal_metrics.sql index fda2539..9a32e25 100644 --- a/models/intermediate/core/int_core__mtd_deal_metrics.sql +++ b/models/intermediate/cross/int_mtd_deal_metrics.sql @@ -2,24 +2,19 @@ This model provides Month-To-Date (MTD) based on Deal metrics. */ - {% set dimensions = get_kpi_dimensions() %} {{ config(materialized="table", unique_key=["date", "dimension", "dimension_value"]) }} with - int_core__mtd_deal_lifecycle as ( - select * from {{ ref("int_core__mtd_deal_lifecycle") }} - ), + int_mtd_deal_lifecycle as (select * from {{ ref("int_mtd_deal_lifecycle") }}), int_dates_mtd_by_dimension as ( select * from {{ ref("int_dates_mtd_by_dimension") }} ), int_core__mtd_accommodation_segmentation as ( select * from {{ ref("int_core__mtd_accommodation_segmentation") }} ), - int_core__deal as ( - select * from {{ ref("int_core__deal") }} - ), + int_core__deal as (select * from {{ ref("int_core__deal") }}), deals_metric_aggregation_per_date as ( {% for dimension in dimensions %} select @@ -84,14 +79,16 @@ with sum( case when has_been_booked_within_last_12_months then 1 else 0 end ) as deals_booked_in_12_months - from int_core__mtd_deal_lifecycle al + from int_mtd_deal_lifecycle al {% if dimension.dimension == "'by_number_of_listings'" %} inner join int_core__mtd_accommodation_segmentation mas on al.id_deal = mas.id_deal and al.date = mas.date {% elif dimension.dimension == "'by_billing_country'" %} - inner join int_core__deal ud on al.id_deal = ud.id_deal + inner join + int_core__deal ud + on al.id_deal = ud.id_deal and ud.main_billing_country_iso_3_per_deal is not null {% endif %} group by 1, 2, 3 diff --git a/models/intermediate/cross/int_mtd_vs_previous_year_metrics.sql b/models/intermediate/cross/int_mtd_vs_previous_year_metrics.sql index 2f20b3d..db22061 100644 --- a/models/intermediate/cross/int_mtd_vs_previous_year_metrics.sql +++ b/models/intermediate/cross/int_mtd_vs_previous_year_metrics.sql @@ -22,9 +22,7 @@ with int_core__mtd_accommodation_metrics as ( select * from {{ ref("int_core__mtd_accommodation_metrics") }} ), - int_core__mtd_deal_metrics as ( - select * from {{ ref("int_core__mtd_deal_metrics") }} - ), + int_mtd_deal_metrics as (select * from {{ ref("int_mtd_deal_metrics") }}), int_core__mtd_guest_payments_metrics as ( select * from {{ ref("int_core__mtd_guest_payments_metrics") }} ), @@ -202,7 +200,7 @@ with and d.dimension = accommodations.dimension and d.dimension_value = accommodations.dimension_value left join - int_core__mtd_deal_metrics deals + int_mtd_deal_metrics deals on d.date = deals.date and d.dimension = deals.dimension and d.dimension_value = deals.dimension_value diff --git a/models/intermediate/cross/schema.yml b/models/intermediate/cross/schema.yml index 64bb5c5..45164ab 100644 --- a/models/intermediate/cross/schema.yml +++ b/models/intermediate/cross/schema.yml @@ -1396,3 +1396,129 @@ models: - name: listings_booked_in_month_churn_average_contribution data_type: numeric description: Listings Booked in Month churn rate (average approach). + + - name: int_mtd_deal_lifecycle + description: | + This model contains the historic information regarding the lifecycle of hosts, at deal level. + The information regarding the booking-related time allows for the current status of any + deal regarding its activity. This information is encapsulated in the following columns: + + deal_lifecycle_state: contains one of the following states + - 01-New: Deals that have been created in the current month, without bookings + - 02-Never Booked: Deals that have been created before the current month, without bookings. + - 03-First Time Booked: Deals that have been booked for the first time in the current month. + - 04-Active: Deals that have booking activity in the past 12 months (that are not FTB nor reactivated) + - 05-Churning: Deals that are becoming inactive because of lack of bookings in the past 12 months + - 06-Inactive: Deals that have not had a booking for more than 12 months. + - 07-Reactivated: Deals that have had a booking in the current month that were inactive or churning before. + - Finally, if none of the logic applies, which should not happen, null will be set and a dbt alert will raise. + + Since the states of Active, First Time Booked and Reactivated indicate certain booking activity and are + mutually exclusive, the model also provides information of the recency of the bookings by the following + booleans: + - has_been_booked_within_current_month: If a deal has had a booking created in the current month + - has_been_booked_within_last_6_months: If a deal has had a booking created in the past 6 months + - has_been_booked_within_last_12_months: If a deal has had a booking created in the past 12 months + Note that if a deal has had a booking created in a given month, all 3 columns will be true. Similarly, + if the last booking created to a deal was 5 months ago, only the column has_been_booked_in_1_month + will be false; while the other 2 will be true. + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - id_deal + + columns: + - name: date + data_type: date + description: The date for the month-to-date. Information is inclusive to the date displayed. + tests: + - not_null + + - name: id_deal + data_type: character varying + description: Id of the deal associated to the host. + tests: + - not_null + + - name: creation_date_utc + data_type: date + description: Date of when the first host associated to that deal was created. + + - name: first_time_booked_date_utc + data_type: date + description: | + Date of the first booking created for a given deal. Can be null if the deal + has never had a booking associated with it. + + - name: last_time_booked_date_utc + data_type: date + description: | + Date of the last booking created for a given deal. Can be null if the deal + has never had a booking associated with it. Can be the same as first_time_booked_date_utc + if the deal only had 1 booking in its history. + + - name: second_to_last_time_booked_date_utc + data_type: date + description: | + Date of the second-to-last booking created for a given deal, meaning the creation + date of the booking that precedes the last one. It's relevant for the reactivation computation + on the lifecycle. Can be null if the deal has never had a booking associated with it or if + the deal only had 1 booking in its history. + + - name: deal_lifecycle_state + data_type: character varying + description: | + Contains the lifecycle state of a deal. The accepted values are: + 01-New, 02-Never Booked, 03-First Time Booked, 04-Active, 05-Churning, 06-Inactive, + 07-Reactivated. Failing to implement the logic will result in alert. + tests: + - not_null + + - name: has_been_booked_within_current_month + data_type: boolean + description: If the deal has had a booking created in the current month. + + - name: has_been_booked_within_last_6_months + data_type: boolean + description: If the deal has had a booking created in the past 6 months. + + - name: has_been_booked_within_last_12_months + data_type: boolean + description: If the deal has had a booking created in the past 12 months. + + - name: int_mtd_deal_metrics + description: | + This model contains the historic information regarding the deals in an aggregated manner. + It's used for the business KPIs. Data is aggregated at the last day of the month and in the + days necessary for the Month-to-Date computation of the current month. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - dimension + - dimension_value + + columns: + - name: date + data_type: date + description: The date for the month-to-date deal-related metrics. + tests: + - not_null + + - name: dimension + data_type: string + description: The dimension or granularity of the metrics. + tests: + - accepted_values: + values: + - global + - by_number_of_listings + - by_billing_country + + - name: dimension_value + data_type: string + description: The value or segment available for the selected dimension. + tests: + - not_null From b0c50d4da22b6186772af8f45aad63dd8fd996ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oriol=20Roqu=C3=A9=20Paniagua?= Date: Wed, 16 Oct 2024 12:15:14 +0000 Subject: [PATCH 2/2] Merged PR 3194: First version of Hubspot deals in intermediate # Description Very minimal, first version of Hubspot deals. I intentionally didn't include here sales related information or additional attributes since I don't need them for Churn related topics. This can be done in future PRs. This Deal version includes the name of the Hubspot pipeline and the Stage. It also excludes deals assigned to Guardhog pipeline (~3k) based on my discussion with Alex. # Checklist - [X] The edited models and dependants run properly with production data. - [X] The edited models are sufficiently documented. - [X] The edited models contain PK tests, and I've ran and passed them. - [X] I have checked for DRY opportunities with other models and docs. - [X] I've picked the right materialization for the affected models. **Left as a view since there's not that many records, can be changed in the future if needed** # Other - [ ] Check if a full-refresh is required after this PR is merged. Related work items: #22689 --- .../hubspot/int_hubspot__deal.sql | 31 +++++++ models/intermediate/hubspot/schema.yml | 83 +++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 models/intermediate/hubspot/int_hubspot__deal.sql create mode 100644 models/intermediate/hubspot/schema.yml diff --git a/models/intermediate/hubspot/int_hubspot__deal.sql b/models/intermediate/hubspot/int_hubspot__deal.sql new file mode 100644 index 0000000..2b1bfb0 --- /dev/null +++ b/models/intermediate/hubspot/int_hubspot__deal.sql @@ -0,0 +1,31 @@ +{% set id_deal_pipelines_excluded = "('15380854')" %} + +with + stg_hubspot__deals as (select * from {{ ref("stg_hubspot__deals") }}), + stg_hubspot__deal_pipeline_stages as ( + select * from {{ ref("stg_hubspot__deal_pipeline_stages") }} + ), + stg_hubspot__deal_pipelines as ( + select * from {{ ref("stg_hubspot__deal_pipelines") }} + ) +select + d.id_deal, + d.deal_name, + d.contract_signed_date_utc, + d.onboarding_date_utc, + d.live_date_utc, + d.cancellation_date_utc, + d.account_manager, + dp.deal_pipeline_name as deal_pipeline, + dps.stage_name as deal_hubspot_stage, + d.cancellation_category, + d.cancellation_details, + d.created_at_utc, + d.created_date_utc, + d.updated_at_utc, + d.updated_date_utc +from stg_hubspot__deals d +left join stg_hubspot__deal_pipeline_stages dps on d.id_deal_stage = dps.id_stage +left join stg_hubspot__deal_pipelines dp on dps.id_deal_pipeline = dp.id_deal_pipeline +-- Exclude Guardhog pipelines +where dps.id_deal_pipeline not in {{ id_deal_pipelines_excluded }} diff --git a/models/intermediate/hubspot/schema.yml b/models/intermediate/hubspot/schema.yml new file mode 100644 index 0000000..19783b6 --- /dev/null +++ b/models/intermediate/hubspot/schema.yml @@ -0,0 +1,83 @@ +version: 2 + +models: + - name: int_hubspot__deal + description: | + Our deals in Hubspot. A deal is a commercial relationship with a client + company. It's possible for a single company to have multiple deals, but + it's frequent for them to only have one. + + Deals are one of the spines of Superhog's data. You will find references + to Deal Ids in both our in-house systems and Xero. + + Note that deals that follow the Guardhog pipeline are excluded from this + table. + + columns: + - name: id_deal + data_type: character varying + description: Unique ID for this deal. + tests: + - unique + - not_null + + - name: deal_name + data_type: text + description: Name of the deal. + + - name: contract_signed_date_utc + data_type: date + description: Date in which the contract was signed + + - name: onboarding_date_utc + data_type: date + description: Date in which the contract was onboarded + + - name: live_date_utc + data_type: date + description: Date in which the account went live + + - name: cancellation_date_utc + data_type: date + description: Date in which an account that was being used was cancelled + + - name: account_manager + data_type: text + description: | + The name of the account manager that is currently taking care of this + deal. + + - name: deal_pipeline + data_type: text + description: | + The name of the hubspot pipeline for this deal. + + - name: deal_hubspot_stage + data_type: text + description: | + The name of the hubspot stage this deal is currently in. The different + stages a deal can be in depend on the hubspot pipeline. + + - name: cancellation_category + data_type: text + description: Categorization as to why they cancelled the account + + - name: cancellation_details + data_type: text + description: Free text with additional comments on why they cancelled the account + + - name: created_at_utc + data_type: timestamp with time zone + description: Timestamp of when the record was created in Hubspot + + - name: created_date_utc + data_type: date + description: Date of when the record was created in Hubspot + + - name: updated_at_utc + data_type: timestamp with time zone + description: Timestamp of when the record was last updated in Hubspot + + - name: updated_date_utc + data_type: date + description: Date of when the record was last updated in Hubspot