From 94bdc53adf73de1481342bd6a2108c8a88532838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oriol=20Roqu=C3=A9=20Paniagua?= Date: Tue, 6 Aug 2024 15:03:32 +0000 Subject: [PATCH] Merged PR 2498: Materialise int_dates_mtd and int_dates_by_deal as table to improve performance # Description Materialise int_dates_mtd and int_dates_by_deal as tables. This should improve the run speed as seen in local by quite a bit, and hopefully provide a better starting point for adding new dimensionality on business kpis. I also documented these 2 models, that were missing :) # Checklist - [X] The edited models and dependants run properly with production data. - [X] The edited models are sufficiently documented. - [X] The edited models contain PK tests, and I've ran and passed them. - [X] I have checked for DRY opportunities with other models and docs. - [X] I've picked the right materialization for the affected models. # Other - [] Check if a full-refresh is required after this PR is merged. Related work items: #19514 --- .../intermediate/cross/int_dates_by_deal.sql | 5 +- models/intermediate/cross/int_dates_mtd.sql | 13 +- models/intermediate/cross/schema.yml | 134 ++++++++++++++++++ 3 files changed, 137 insertions(+), 15 deletions(-) diff --git a/models/intermediate/cross/int_dates_by_deal.sql b/models/intermediate/cross/int_dates_by_deal.sql index 35439cd..08c74ee 100644 --- a/models/intermediate/cross/int_dates_by_deal.sql +++ b/models/intermediate/cross/int_dates_by_deal.sql @@ -1,7 +1,6 @@ -/* -This model provides the necessary dates for each deal for deal-based KPIs models to work. -*/ +{{ config(materialized="table", unique_key=["date", "id_deal"]) }} + with int_dates as (select * from {{ ref("int_dates") }}), int_core__unified_user as (select * from {{ ref("int_core__unified_user") }}) diff --git a/models/intermediate/cross/int_dates_mtd.sql b/models/intermediate/cross/int_dates_mtd.sql index fc3cf1a..16648f5 100644 --- a/models/intermediate/cross/int_dates_mtd.sql +++ b/models/intermediate/cross/int_dates_mtd.sql @@ -1,17 +1,6 @@ -/* -This model provides Month-To-Date (MTD) necessary dates for MTD-based models to work. -- For month-to-month complete information, it retrieves all end month dates that have elapsed since 2020. -- For month-to-date information, it retrieves the days of the current month of this year up to yesterday. - Additionally, it also gets the days of its equivalent month from last year previous the current day of month of today. -Example: -Imagine we have are at 4th June 2024. -- We will get the dates for 1st, 2nd, 3rd of June 2024. -- We will also get the dates for 1st, 2nd, 3rd of June 2023. -- We will get all end of months from 2020 to yesterday, - i.e., 31st January 2020, 29th February 2020, ..., 30th April 2024, 31st May 2024. +{{ config(materialized="table", unique_key="date") }} -*/ with int_dates as (select * from {{ ref("int_dates") }} where date_day >= {{ var("start_date") }}), raw_dates as ( diff --git a/models/intermediate/cross/schema.yml b/models/intermediate/cross/schema.yml index 5858491..8ac85ee 100644 --- a/models/intermediate/cross/schema.yml +++ b/models/intermediate/cross/schema.yml @@ -150,6 +150,140 @@ models: - not_null - unique + - name: int_dates_mtd + description: | + This model provides Month-To-Date (MTD) necessary dates for MTD-based models to work. + - For month-to-month complete information, it retrieves all end month dates that have elapsed since 2020. + - For month-to-date information, it retrieves the days of the current month of this year up to yesterday. + Additionally, it also gets the days of its equivalent month from last year previous the current day of month of today. + + Example: + Imagine we have are at 4th June 2024. + - We will get the dates for 1st, 2nd, 3rd of June 2024. + - We will also get the dates for 1st, 2nd, 3rd of June 2023. + - We will get all end of months from 2020 to yesterday, + i.e., 31st January 2020, 29th February 2020, ..., 30th April 2024, 31st May 2024. + + columns: + - name: year + data_type: int + description: Year number of the given date. + tests: + - not_null + + - name: month + data_type: int + description: Month number of the given date. + tests: + - not_null + + - name: day + data_type: int + description: Day monthly number of the given date. + tests: + - not_null + + - name: is_end_of_month + data_type: boolean + description: Is end of month, 1 for yes, 0 for no. + tests: + - not_null + + - name: is_current_month + data_type: boolean + description: | + Checks if the date is within the current executed month, + 1 for yes, 0 for no. + tests: + - not_null + + - name: first_day_month + data_type: date + description: | + First day of the month correspoding to the date field. + It comes from int_dates_mtd logic. + tests: + - not_null + + - name: date + data_type: date + description: | + Main date for the computation, that is used for filters. + It's the primary key for this model. + tests: + - not_null + - unique + + - name: int_dates_by_deal + description: | + This model provides the necessary dates for each deal for deal-based KPIs models to work. + It only considers those dates starting from when the host user of the deal was first available. + + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - id_deal + + columns: + - name: year + data_type: int + description: Year number of the given date. + tests: + - not_null + + - name: month + data_type: int + description: Month number of the given date. + tests: + - not_null + + - name: day + data_type: int + description: Day monthly number of the given date. + tests: + - not_null + + - name: is_end_of_month + data_type: boolean + description: Is end of month, 1 for yes, 0 for no. + tests: + - not_null + + - name: is_current_month + data_type: boolean + description: | + Checks if the date is within the current executed month, + 1 for yes, 0 for no. + tests: + - not_null + + - name: first_day_month + data_type: date + description: | + First day of the month correspoding to the date field. + It comes from int_dates_mtd logic. + tests: + - not_null + + - name: date + data_type: date + description: | + Main date for the computation, that is used for filters. + It's the primary key for this model. + tests: + - not_null + + - name: id_deal + data_type: string + description: | + Main identifier of the B2B clients. A deal can have multiple hosts. + A host should usually have a deal, but it does not happen on all cases. + In this KPI reporting we force that Deal is not null to avoid potential + data quality issues. + tests: + - not_null + - name: int_mtd_aggregated_metrics description: | The `int_mtd_aggregated_metrics` model aggregates multiple metrics on a year, month, and day basis.