diff --git a/dbt_project.yml b/dbt_project.yml index 81f8077..f6d8a00 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -47,3 +47,4 @@ seeds: vars: "dbt_date:time_zone": "Europe/London" + "start_date": "'2020-01-01'" diff --git a/models/intermediate/core/int_core__mtd_aggregated_metrics.sql b/models/intermediate/core/int_core__mtd_aggregated_metrics.sql new file mode 100644 index 0000000..bcce539 --- /dev/null +++ b/models/intermediate/core/int_core__mtd_aggregated_metrics.sql @@ -0,0 +1,32 @@ +{% set metrics = [ + {'order_by': 1, 'metric': 'Created Bookings', 'value': 'created_bookings', 'previous_year_value': 'previous_year_created_bookings', 'relative_increment': 'relative_increment_created_bookings'}, + {'order_by': 2, 'metric': 'Checkout Bookings', 'value': 'check_out_bookings', 'previous_year_value': 'previous_year_check_out_bookings', 'relative_increment': 'relative_increment_check_out_bookings'}, + {'order_by': 3, 'metric': 'Listings Booked in Month', 'value': 'listings_booked_in_month', 'previous_year_value': 'previous_year_listings_booked_in_month', 'relative_increment': 'relative_increment_listings_booked_in_month'}, + {'order_by': 4, 'metric': 'Listings Booked in 6 Months', 'value': 'listings_booked_in_6_months', 'previous_year_value': 'previous_year_listings_booked_in_6_months', 'relative_increment': 'relative_increment_listings_booked_in_6_months'}, + {'order_by': 5, 'metric': 'Listings Booked in 12 Months', 'value': 'listings_booked_in_12_months', 'previous_year_value': 'previous_year_listings_booked_in_12_months', 'relative_increment': 'relative_increment_listings_booked_in_12_months'}, + {'order_by': 6, 'metric': 'Hosts Booked in Month', 'value': 'hosts_booked_in_month', 'previous_year_value': 'previous_year_hosts_booked_in_month', 'relative_increment': 'relative_increment_hosts_booked_in_month'}, + {'order_by': 7, 'metric': 'Hosts Booked in 6 Months', 'value': 'hosts_booked_in_6_months', 'previous_year_value': 'previous_year_hosts_booked_in_6_months', 'relative_increment': 'relative_increment_hosts_booked_in_6_months'}, + {'order_by': 8, 'metric': 'Hosts Booked in 12 Months', 'value': 'hosts_booked_in_12_months', 'previous_year_value': 'previous_year_hosts_booked_in_12_months', 'relative_increment': 'relative_increment_hosts_booked_in_12_months'} +] %} + +with + int_core__mtd_booking_metrics as ( + select * from {{ ref("int_core__mtd_booking_metrics") }} + ) +{% for metric in metrics %} +select + year, + month, + day, + is_end_of_month, + is_current_month, + date, + previous_year_date, + {{ metric.order_by }} as order_by, + '{{ metric.metric }}' as metric, + {{ metric.value }} as value, + {{ metric.previous_year_value }} as previous_year_value, + {{ metric.relative_increment }} as relative_increment +from int_core__mtd_booking_metrics +{% if not loop.last %}union all{% endif %} +{% endfor %} diff --git a/models/intermediate/core/int_core__mtd_booking_metrics.sql b/models/intermediate/core/int_core__mtd_booking_metrics.sql new file mode 100644 index 0000000..a59577a --- /dev/null +++ b/models/intermediate/core/int_core__mtd_booking_metrics.sql @@ -0,0 +1,217 @@ +/* +This model provides Month-To-Date (MTD) based on Booking metrics. + +At this stage, we are not filtering out any booking_state (Approved, Cancelled, NotApproved, NoFlags, Flagged, IncompleteInformation, Rejected) +*/ +{{ config(materialized="table", unique_key="date") }} +with + int_core__bookings as (select * from {{ ref("int_core__bookings") }}), + int_dates_mtd as (select * from {{ ref("int_dates_mtd") }}), + + -- Created Bookings MTD -- + created_year_month as ( + select d.date, count(distinct b.id_booking) as created_bookings + from int_dates_mtd d + inner join + int_core__bookings b + on extract(year from b.created_date_utc) = d.year + and extract(month from b.created_date_utc) = d.month + and extract(day from b.created_date_utc) <= d.day + group by 1 + ), + -- Checkout Bookings MTD -- + check_out_year_month as ( + select d.date, count(distinct b.id_booking) as check_out_bookings + from int_dates_mtd d + inner join + int_core__bookings b + on extract(year from b.check_out_date_utc) = d.year + and extract(month from b.check_out_date_utc) = d.month + and extract(day from b.check_out_date_utc) <= d.day + group by 1 + ), + -- Computation of number of months difference for activity-based logic -- + creation_month_diff as ( + select + d.date, + b.id_accommodation, + b.id_user_host, + b.id_user_guest, + max(b.created_date_utc) as last_created_date, + (d.year - extract(year from max(b.created_date_utc))) * 12 + + (d.month - extract(month from max(b.created_date_utc))) as month_diff + from int_dates_mtd d + inner join + int_core__bookings b + on b.created_date_utc between d.date - interval '13 months' and d.date -- reduce the computation at 12 + 1 month to reduce the rows to retrieve + group by 1, 2, 3, 4, d.year, d.month + ), + -- Computation of booking-based activity metrics -- + booking_based_activity as ( + select + cmd.date, + -- Listings activity -- + count( + distinct case + when cmd.month_diff = 0 then cmd.id_accommodation else null + end + ) as listings_booked_in_month, + count( + distinct case + when cmd.month_diff >= 0 and cmd.month_diff < 6 + then cmd.id_accommodation + else null + end + ) as listings_booked_in_6_months, + count( + distinct case + when cmd.month_diff >= 0 and cmd.month_diff < 12 + then cmd.id_accommodation + else null + end + ) as listings_booked_in_12_months, + count(distinct cmd.id_accommodation) as historic_booked_listings, + -- Hosts/PM activity -- + count( + distinct case + when cmd.month_diff = 0 then cmd.id_user_host else null + end + ) as hosts_booked_in_month, + count( + distinct case + when cmd.month_diff >= 0 and cmd.month_diff < 6 + then cmd.id_user_host + else null + end + ) as hosts_booked_in_6_months, + count( + distinct case + when cmd.month_diff >= 0 and cmd.month_diff < 12 + then cmd.id_user_host + else null + end + ) as hosts_booked_in_12_months, + count(distinct cmd.id_user_host) as historic_booked_hosts, + -- Guests activity -- + count( + distinct case + when cmd.month_diff = 0 then cmd.id_user_guest else null + end + ) as guests_booked_in_month, + count( + distinct case + when cmd.month_diff >= 0 and cmd.month_diff < 6 + then cmd.id_user_guest + else null + end + ) as guests_booked_in_6_months, + count( + distinct case + when cmd.month_diff >= 0 and cmd.month_diff < 12 + then cmd.id_user_guest + else null + end + ) as guests_booked_in_12_months, + count(distinct cmd.id_user_guest) as historic_booked_guests + + from creation_month_diff cmd + group by 1 + ), + main_kpi as ( + -- Final aggregation of subqueries -- + select + d.year, + d.month, + d.day, + d.date, + d.is_end_of_month, + d.is_current_month, + cym.created_bookings, + coym.check_out_bookings, + bba.listings_booked_in_month, + bba.listings_booked_in_6_months, + bba.listings_booked_in_12_months, + bba.historic_booked_listings, + bba.hosts_booked_in_month, + bba.hosts_booked_in_6_months, + bba.hosts_booked_in_12_months, + bba.historic_booked_hosts, + bba.guests_booked_in_month, + bba.guests_booked_in_6_months, + bba.guests_booked_in_12_months, + bba.historic_booked_guests + from int_dates_mtd d + left join created_year_month cym on cym.date = d.date + left join check_out_year_month coym on d.date = coym.date + left join booking_based_activity bba on d.date = bba.date + ) +-- Pivoting to get previous year for each line & computing relative increment +-- (rel_incr) -- +select + a.year, + a.month, + a.day, + a.is_end_of_month, + a.is_current_month, + a.date, + b.date as previous_year_date, + a.created_bookings, + b.created_bookings as previous_year_created_bookings, + cast(a.created_bookings as decimal) / b.created_bookings + - 1 as relative_increment_created_bookings, + a.check_out_bookings, + b.check_out_bookings as previous_year_check_out_bookings, + cast(a.check_out_bookings as decimal) / b.check_out_bookings + - 1 as relative_increment_check_out_bookings, + a.listings_booked_in_month, + b.listings_booked_in_month as previous_year_listings_booked_in_month, + cast(a.listings_booked_in_month as decimal) / b.listings_booked_in_month + - 1 as relative_increment_listings_booked_in_month, + a.listings_booked_in_6_months, + b.listings_booked_in_6_months as previous_year_listings_booked_in_6_months, + cast(a.listings_booked_in_6_months as decimal) / b.listings_booked_in_6_months + - 1 as relative_increment_listings_booked_in_6_months, + a.listings_booked_in_12_months, + b.listings_booked_in_12_months as previous_year_listings_booked_in_12_months, + cast(a.listings_booked_in_12_months as decimal) / b.listings_booked_in_12_months + - 1 as relative_increment_listings_booked_in_12_months, + a.historic_booked_listings, + b.historic_booked_listings as previous_year_historic_booked_listings, + cast(a.historic_booked_listings as decimal) / b.historic_booked_listings + - 1 as relative_increment_historic_booked_listings, + a.hosts_booked_in_month, + b.hosts_booked_in_month as previous_year_hosts_booked_in_month, + cast(a.hosts_booked_in_month as decimal) / b.hosts_booked_in_month + - 1 as relative_increment_hosts_booked_in_month, + a.hosts_booked_in_6_months, + b.hosts_booked_in_6_months as previous_year_hosts_booked_in_6_months, + cast(a.hosts_booked_in_6_months as decimal) / b.hosts_booked_in_6_months + - 1 as relative_increment_hosts_booked_in_6_months, + a.hosts_booked_in_12_months, + b.hosts_booked_in_12_months as previous_year_hosts_booked_in_12_months, + cast(a.hosts_booked_in_12_months as decimal) / b.hosts_booked_in_12_months + - 1 as relative_increment_hosts_booked_in_12_months, + a.historic_booked_hosts, + b.historic_booked_hosts as previous_year_historic_booked_hosts, + cast(a.historic_booked_hosts as decimal) / b.historic_booked_hosts + - 1 as relative_increment_historic_booked_hosts, + a.guests_booked_in_month, + b.guests_booked_in_month as previous_year_guests_booked_in_month, + cast(a.guests_booked_in_month as decimal) / b.guests_booked_in_month + - 1 as relative_increment_guests_booked_in_month, + a.guests_booked_in_6_months, + b.guests_booked_in_6_months as previous_year_guests_booked_in_6_months, + cast(a.guests_booked_in_6_months as decimal) / b.guests_booked_in_6_months + - 1 as relative_increment_guests_booked_in_6_months, + a.guests_booked_in_12_months, + b.guests_booked_in_12_months as previous_year_guests_booked_in_12_months, + cast(a.guests_booked_in_12_months as decimal) / b.guests_booked_in_12_months + - 1 as relative_increment_guests_booked_in_12_months, + a.historic_booked_guests, + b.historic_booked_guests as previous_year_historic_booked_guests, + cast(a.historic_booked_guests as decimal) / b.historic_booked_guests + - 1 as relative_increment_historic_booked_guests +from main_kpi a +left join main_kpi b on a.month = b.month and a.year = b.year + 1 +where (a.is_end_of_month = 1 or (a.is_current_month = 1 and a.day = b.day)) + diff --git a/models/intermediate/core/schema.yaml b/models/intermediate/core/schema.yaml index 8f5b57d..371f648 100644 --- a/models/intermediate/core/schema.yaml +++ b/models/intermediate/core/schema.yaml @@ -143,3 +143,70 @@ models: - not_null - unique + - name: int_core__mtd_booking_metrics + columns: + - name: date + data_type: date + description: The date for the month-to-date booking-related metrics. + tests: + - not_null + - unique + + - name: int_core__mtd_aggregated_metrics + description: | + The `int_core__mtd_aggregated_metrics` model aggregates multiple metrics on a year, month, and day basis. + The primary sources of data are the `int_core__mtd_XXXXX_metrics` models, which contain the raw metrics data per source. + This model uses Jinja templating to dynamically generate SQL code, combining various metrics into a single table. + This approach reduces repetition and enhances maintainability. + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - metric + + columns: + - name: year + data_type: int + description: year number of the given date. + tests: + - not_null + + - name: month + data_type: int + description: month number of the given date. + tests: + - not_null + + - name: day + data_type: int + description: day monthly number of the given date. + tests: + - not_null + + - name: is_end_of_month + data_type: boolean + description: is end of month, 1 for yes, 0 for no. + tests: + - not_null + + - name: is_current_month + data_type: boolean + description: | + checks if the date is within the current executed month, + 1 for yes, 0 for no. + tests: + - not_null + + - name: date + data_type: date + description: | + main date for the computation, that is used for filters. + It comes from int_dates_mtd logic. + tests: + - not_null + + - name: metric + data_type: text + description: name of the business metric. + tests: + - not_null diff --git a/models/intermediate/int_dates_mtd.sql b/models/intermediate/int_dates_mtd.sql new file mode 100644 index 0000000..a18e201 --- /dev/null +++ b/models/intermediate/int_dates_mtd.sql @@ -0,0 +1,57 @@ +/* +This model provides Month-To-Date (MTD) necessary dates for MTD-based models to work. +- For month-to-month complete information, it retrieves all end month dates that have elapsed since 2020. +- For month-to-date information, it retrieves the days of the current month of this year up to yesterday. + Additionally, it also gets the days of its equivalent month from last year previous the current day of month of today. + +Example: +Imagine we have are at 4th June 2024. +- We will get the dates for 1st, 2nd, 3rd of June 2024. +- We will also get the dates for 1st, 2nd, 3rd of June 2023. +- We will get all end of months from 2020 to yesterday, + i.e., 31st January 2020, 29th February 2020, ..., 30th April 2024, 31st May 2024. + +*/ +with + int_dates as (select * from {{ ref("int_dates") }} where date_day >= {{ var("start_date") }}), + raw_dates as ( + select + id.year_number as year, + id.month_of_year as month, + id.day_of_month as day, + id.date_day as date, + id.month_start_date as first_day_month, + id.month_end_date as last_day_month, + now()::date as today + from int_dates id + ) +-- Dates keeps only end of month (EOM) dates, as well as +-- dates corresponding to the current month and the same month last year +select distinct + rd.year, + rd.month, + rd.day, + rd.date, + rd.first_day_month, + rd.last_day_month, + case when rd.date = rd.last_day_month then 1 else 0 end as is_end_of_month, + case + when date_trunc('month', rd.date) = date_trunc('month', rd.today) then 1 else 0 + end as is_current_month +from raw_dates rd +where + rd.today > rd.date + -- include only up-to yesterday + and ( + rd.date = rd.last_day_month + -- keep all last day of the month + or rd.date >= date_trunc('month', rd.today) + -- keep all individual days of the current month + or ( + rd.year = extract(year from rd.today) - 1 + and rd.month = extract(month from rd.today) + and rd.day < extract(day from rd.today) + ) + -- keep all days of same month last day up to yesterday's day + ) + diff --git a/models/reporting/core/core__mtd_aggregated_metrics.sql b/models/reporting/core/core__mtd_aggregated_metrics.sql new file mode 100644 index 0000000..3418116 --- /dev/null +++ b/models/reporting/core/core__mtd_aggregated_metrics.sql @@ -0,0 +1,19 @@ +with + int_core__mtd_aggregated_metrics as ( + select * from {{ ref("int_core__mtd_aggregated_metrics") }} + ) + +select + year as year, + month as month, + day as day, + is_end_of_month as is_end_of_month, + is_current_month as is_current_month, + date as date, + previous_year_date as previous_year_date, + order_by as order_by, + metric as metric, + value as value, + previous_year_value as previous_year_value, + relative_increment as relative_increment +from int_core__mtd_aggregated_metrics diff --git a/models/reporting/core/schema.yaml b/models/reporting/core/schema.yaml index 3658766..c4711f1 100644 --- a/models/reporting/core/schema.yaml +++ b/models/reporting/core/schema.yaml @@ -564,4 +564,92 @@ models: - name: dwh_extracted_at_utc data_type: timestamp with time zone - description: "" \ No newline at end of file + description: "" + - name: core__mtd_aggregated_metrics + description: | + This is a month-to-date aggregated table designed to support + the business KPIs reporting. + + At this stage, it's mostly a draft. + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - date + - metric + + columns: + - name: year + data_type: int + description: year number of the given date. + tests: + - not_null + + - name: month + data_type: int + description: month number of the given date. + tests: + - not_null + + - name: day + data_type: int + description: day monthly number of the given date. + tests: + - not_null + + - name: is_end_of_month + data_type: boolean + description: is end of month, 1 for yes, 0 for no. + tests: + - not_null + + - name: is_current_month + data_type: boolean + description: | + checks if the date is within the current executed month, + 1 for yes, 0 for no. + tests: + - not_null + + - name: date + data_type: date + description: | + main date for the computation, that is used for filters. + It comes from int_dates_mtd logic. + tests: + - not_null + + - name: previous_year_date + data_type: date + description: | + equivalent last year date. Mostly unused, just to validate + that the logic works well. + + - name: order_by + data_type: int + description: | + an integer used to fix a display order of the metrics in the visuals. + + - name: metric + data_type: text + description: name of the business metric. + tests: + - not_null + + - name: value + data_type: int8 + description: | + value of the current metric. It represents the MTD value if the month is + in progress, and the Monthly value if the month has already finished. + + - name: previous_year_value + data_type: int8 + description: | + value of the current metric, but from last year. It represents the + last year MTD value if the month is in progress, and the last year + Monthly value if the month has already finished. + + - name: relative_increment + data_type: numeric + description: | + relative increment between value vs. previous_year_value. If previous_year_value is null, + relative_increment will also be null.