From 010135fb63ec419454e0fcc57c113ad8af3ef41f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oriol=20Roqu=C3=A9=20Paniagua?= Date: Mon, 1 Jul 2024 16:00:14 +0000 Subject: [PATCH] Merged PR 2164: Adding booking metrics by deal id for business kpis This is a first approach to compute some easy metrics for the "deal" based business kpis. At this stage, it contains the information of bookings (created, checkout, cancelled) per deal and month, including both historic months as well as the current one. This do not contain MTD computation because it's overkill to do a MTD at deal level (+ we have 1k deals, so scalability can become a problem in the future) Models: - **int_dates_by_deal**: simple model that reads from **int_dates** and just joins it with **unified_users** to retrieve the deals. It will be used as the 'source of truth' for which deals should be considered in a given month, basically, since the first host associated to a deal is created (not necessarily booked) - **int_core__monthly_booking_history_by_deal**: it contains the history of bookings per deal id in a monthly basis. It should be easy enough to integrate here, in the future and if needed, B2B macro segmentation. In terms of performance, comparing the model **int_core__monthly_booking_history_by_deal** and **int_core__mtd_booking_metrics** you'll see that I removed the joined with the **int_dates_xxx** in the CTEs. This is because I want to avoid a double join of date & deal that I tried and I stopped after 5 min running. Since this computation is in a monthly basis - no MTD - it's easy enough to just apply the **int_dates_by_deal** on the last part of the query. With this approach, it runs in 7 seconds. Related work items: #17689 --- dbt_project.yml | 6 +- ..._core__monthly_booking_history_by_deal.sql | 69 +++++++++++++++++++ .../core/int_core__mtd_booking_metrics.sql | 2 +- models/intermediate/core/schema.yaml | 27 +++++++- .../intermediate/cross/int_dates_by_deal.sql | 28 ++++++++ 5 files changed, 129 insertions(+), 3 deletions(-) create mode 100644 models/intermediate/core/int_core__monthly_booking_history_by_deal.sql create mode 100644 models/intermediate/cross/int_dates_by_deal.sql diff --git a/dbt_project.yml b/dbt_project.yml index 13ba36f..55e6a96 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -49,4 +49,8 @@ vars: "dbt_date:time_zone": "Europe/London" # A general cutoff date for relevancy. Many models assume this to be the point # in time after which they should work. - "start_date": "'2020-01-01'" \ No newline at end of file + "start_date": "'2020-01-01'" + + # Booking state variables + # States should be strings in capital letters. Models need to force an upper() + "cancelled_booking_state": "'CANCELLED'" \ No newline at end of file diff --git a/models/intermediate/core/int_core__monthly_booking_history_by_deal.sql b/models/intermediate/core/int_core__monthly_booking_history_by_deal.sql new file mode 100644 index 0000000..ca1ad6d --- /dev/null +++ b/models/intermediate/core/int_core__monthly_booking_history_by_deal.sql @@ -0,0 +1,69 @@ +/* +This model provides monthly booking metrics for those hosts that have a deal assigned. + +*/ +{{ config(materialized="table", unique_key=["date","id_deal"]) }} + +with + int_core__bookings as (select * from {{ ref("int_core__bookings") }}), + int_dates_by_deal as (select * from {{ ref("int_dates_by_deal") }}), + int_core__unified_user as (select * from {{ ref("int_core__unified_user") }}), + + -- Created Bookings by id_deal -- + created_year_month as ( + select + date_trunc('month', b.created_date_utc)::date as first_day_month, + u.id_deal, + count(distinct b.id_booking) as created_bookings + from int_core__unified_user u + inner join int_core__bookings b + on u.id_user = b.id_user_host + where u.id_deal is not null + group by 1,2 + ), + -- Checkout Bookings by id_deal -- + check_out_year_month as ( + select + date_trunc('month', b.check_out_date_utc)::date as first_day_month, + u.id_deal, + count(distinct b.id_booking) as check_out_bookings + from int_core__unified_user u + inner join int_core__bookings b + on u.id_user = b.id_user_host + where u.id_deal is not null + group by 1,2 + ), + -- Cancelled Bookings by id_deal -- + -- Cancellation date equivalent to the last time the cancelled booking was updated + cancelled_year_month as ( + select + date_trunc('month', b.updated_date_utc)::date as first_day_month, + u.id_deal, + count(distinct b.id_booking) as cancelled_bookings + from int_core__unified_user u + inner join int_core__bookings b + on u.id_user = b.id_user_host + where u.id_deal is not null + and upper(b.booking_state) = {{ var("cancelled_booking_state") }} + group by 1,2 + ) + -- Final aggregation of subqueries -- + select + d.year, + d.month, + d.day, + d.date, + d.id_deal, + crym.created_bookings, + coym.check_out_bookings, + caym.cancelled_bookings + from int_dates_by_deal d + left join created_year_month crym + on crym.first_day_month = d.first_day_month + and crym.id_deal = d.id_deal + left join check_out_year_month coym + on coym.first_day_month = d.first_day_month + and coym.id_deal = d.id_deal + left join cancelled_year_month caym + on caym.first_day_month = d.first_day_month + and caym.id_deal = d.id_deal diff --git a/models/intermediate/core/int_core__mtd_booking_metrics.sql b/models/intermediate/core/int_core__mtd_booking_metrics.sql index f06685a..26fe2ec 100644 --- a/models/intermediate/core/int_core__mtd_booking_metrics.sql +++ b/models/intermediate/core/int_core__mtd_booking_metrics.sql @@ -39,7 +39,7 @@ with on extract(year from b.updated_date_utc) = d.year and extract(month from b.updated_date_utc) = d.month and extract(day from b.updated_date_utc) <= d.day - and upper(b.booking_state) = 'CANCELLED' + and upper(b.booking_state) = {{ var("cancelled_booking_state") }} group by 1 ), main_kpi as ( diff --git a/models/intermediate/core/schema.yaml b/models/intermediate/core/schema.yaml index bb897c4..560c55d 100644 --- a/models/intermediate/core/schema.yaml +++ b/models/intermediate/core/schema.yaml @@ -143,6 +143,31 @@ models: - not_null - unique + - name: int_core__monthly_booking_history_by_deal + description: | + This model contains the historic information regarding the bookings for each deal id. + It's used for the business KPIs in the view by deal id. Data is aggregated at the last + day of the month, or up to yesterday if it's the current month. + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - id_deal + + columns: + - name: date + data_type: date + description: The last day of the month or yesterday for historic booking metrics. + tests: + - not_null + + - name: id_deal + data_type: character varying + description: Id of the deal associated to the host. + tests: + - not_null + + - name: int_core__mtd_booking_metrics description: | This model contains the historic information regarding the bookings in an aggregated manner. @@ -764,7 +789,7 @@ models: - not_null - name: id_deal - data_type: bigint + data_type: character varying description: Id of the deal associated to the host. tests: - not_null diff --git a/models/intermediate/cross/int_dates_by_deal.sql b/models/intermediate/cross/int_dates_by_deal.sql new file mode 100644 index 0000000..35439cd --- /dev/null +++ b/models/intermediate/cross/int_dates_by_deal.sql @@ -0,0 +1,28 @@ +/* +This model provides the necessary dates for each deal for deal-based KPIs models to work. + +*/ +with + int_dates as (select * from {{ ref("int_dates") }}), + int_core__unified_user as (select * from {{ ref("int_core__unified_user") }}) + +select distinct + d.year_number as year, + d.month_of_year as month, + d.day_of_month as day, + d.date_day as date, + u.id_deal, + d.month_start_date as first_day_month, + d.month_end_date as last_day_month +from int_core__unified_user u +inner join int_dates d on d.date_day >= u.created_date_utc +where + u.id_deal is not null + -- include only up-to yesterday + and now()::date > d.date_day + and ( + -- keep all last day of the month + d.date_day = d.month_end_date + -- keep yesterday + or now()::date = d.date_day + 1 + )