From ad2eb2544ce39b8a465c842a3ac040288bd6e934 Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Thu, 29 Aug 2024 11:09:09 +0200 Subject: [PATCH 1/8] edeposit_agg_fee_per_user to reporting --- models/intermediate/core/schema.yaml | 231 +----------------- .../int_edeposit__agg_fee_per_user.sql | 0 .../int_edeposit__verifications.sql | 0 models/intermediate/edeposit/schema.yaml | 231 ++++++++++++++++++ .../edeposit/edeposit_agg_fee_per_user.sql | 16 ++ models/reporting/edeposit/schema.yaml | 61 +++++ 6 files changed, 309 insertions(+), 230 deletions(-) rename models/intermediate/{core => edeposit}/int_edeposit__agg_fee_per_user.sql (100%) rename models/intermediate/{core => edeposit}/int_edeposit__verifications.sql (100%) create mode 100644 models/intermediate/edeposit/schema.yaml create mode 100644 models/reporting/edeposit/edeposit_agg_fee_per_user.sql create mode 100644 models/reporting/edeposit/schema.yaml diff --git a/models/intermediate/core/schema.yaml b/models/intermediate/core/schema.yaml index 9149abf..f90c0e8 100644 --- a/models/intermediate/core/schema.yaml +++ b/models/intermediate/core/schema.yaml @@ -2781,233 +2781,4 @@ models: - name: has_bookings_with_product_bundle_with_paid_service data_type: integer description: | - Integer-based flag version of total_bookings_with_product_bundle_with_paid_service. - - - name: int_edeposit__verifications - description: - "This table holds records on verifications for e-deposit bookings. - It contains details on validations checked on the guests, guest information - and some booking details like checkin-checkout date or the status of the verification. - The id values found here are completely unrelated to the ones found in Core DWH." - columns: - - name: id_verification - data_type: text - description: "unique Superhog generated id for this verification" - tests: - - unique - - not_null - - - name: id_booking - data_type: text - description: "unique Superhog generated id for a booking" - - - name: id_user_host - data_type: text - description: "unique Superhog generated id for host" - - - name: id_accommodation - data_type: text - description: "unique Superhog generated id for a listing" - - - name: version - data_type: text - description: - "value to identify if it is Guesty (V1) or E-deposit (V2)" - tests: - - accepted_values: - values: - - V1 - - V2 - - - name: verification_source - data_type: text - description: - "source of the verification for the booking" - tests: - - accepted_values: - values: - - Guesty - - Edeposit - - - name: verification_status - data_type: text - description: "status of the verification" - - - name: verification_status_reason - data_type: text - description: "short explanation for status" - - - name: nightly_fee_local - data_type: text - description: "fee charged per night" - - - name: number_nights - data_type: integer - description: "number of nights for the booking" - - - name: email_flag - data_type: text - description: "" - - - name: phone_flag - data_type: text - description: "" - - - name: watch_list - data_type: text - description: "" - - - name: channel - data_type: text - description: "" - - - name: checkin_at_utc - data_type: timestamp without time zone - description: "Timestamp of checkin for the booking" - - - name: checkout_at_utc - data_type: timestamp without time zone - description: "Timestamp of checkout for the booking" - - - name: is_cancelled - data_type: boolean - description: "" - - - name: cancelled_at_utc - data_type: timestamp without time zone - description: "Timestamp of cancellation of the booking" - - - name: user_email - data_type: text - description: "" - - - name: guest_email - data_type: text - description: "" - - - name: guest_last_name - data_type: text - description: "" - - - name: guest_first_name - data_type: text - description: "" - - - name: guest_telephone - data_type: text - description: "" - - - name: company_name - data_type: text - description: "" - - - name: property_manager_name - data_type: text - description: "" - - - name: property_manager_email - data_type: text - description: "" - - - name: listing_name - data_type: text - description: "" - - - name: listing_town - data_type: text - description: "" - - - name: listing_country - data_type: text - description: "" - - - name: listing_postcode - data_type: text - description: "" - - - name: pets_allowed - data_type: boolean - description: "" - - - name: level_of_protection_amount - data_type: integer - description: "" - - - name: level_of_protection_currency - data_type: text - description: "" - - - name: status_updated_at_utc - data_type: timestamp without time zone - description: "Timestamp when status was last updated" - - - name: updated_at_utc - data_type: timestamp without time zone - description: "Timestamp of last updated" - - - name: creation_at_utc - data_type: timestamp without time zone - description: "" - - - name: created_at_utc - data_type: timestamp without time zone - description: "" - - - name: int_edeposit__agg_fee_per_user - description: - "This table holds detailed data on revenue generated through e-deposit verifications. - Each record provides insights into booking activities per user, including the number - of bookings, cancellations, and associated fees within specific months. Each record - captures data for bookings created in a particular month along with their corresponding - checkout month, allowing for a comprehensive view of the booking lifecycle and associated - revenues within those periods." - tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - year_month_created - - year_month_checkout - - id_user_host - columns: - - name: year_month_created - data_type: text - description: "first day of month of created date" - - - name: year_month_checkout - data_type: text - description: "first day of month of check-out date" - - - name: id_user_host - data_type: text - description: "unique id value for user host" - - - name: bookings_per_month - data_type: bigint - description: "total number of bookings" - - - name: cancelled_per_month - data_type: bigint - description: "number of cancelled bookings" - - - name: cancelled_ratio - data_type: numeric - description: "ratio of cancelled bookings over total bookings" - - - name: sum_cancelled_fee - data_type: numeric - description: "sum of fees charged for cancelled bookings - (currency-less)" - - - name: sum_ok_status_fee - data_type: double precision - description: | - "sum of fees charged for bookings with status 'Approved' or 'Flagged' - (currency-less)" - - - name: sum_rejected_fee - data_type: numeric - description: "sum of fees charged for rejected bookings - (currency-less)" - - - name: total_revenue - data_type: double precision - description: "total sum of fees charged (currency-less)" \ No newline at end of file + Integer-based flag version of total_bookings_with_product_bundle_with_paid_service. \ No newline at end of file diff --git a/models/intermediate/core/int_edeposit__agg_fee_per_user.sql b/models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql similarity index 100% rename from models/intermediate/core/int_edeposit__agg_fee_per_user.sql rename to models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql diff --git a/models/intermediate/core/int_edeposit__verifications.sql b/models/intermediate/edeposit/int_edeposit__verifications.sql similarity index 100% rename from models/intermediate/core/int_edeposit__verifications.sql rename to models/intermediate/edeposit/int_edeposit__verifications.sql diff --git a/models/intermediate/edeposit/schema.yaml b/models/intermediate/edeposit/schema.yaml new file mode 100644 index 0000000..2d4aea2 --- /dev/null +++ b/models/intermediate/edeposit/schema.yaml @@ -0,0 +1,231 @@ +version: 2 + +models: + - name: int_edeposit__verifications + description: + "This table holds records on verifications for e-deposit bookings. + It contains details on validations checked on the guests, guest information + and some booking details like checkin-checkout date or the status of the verification. + The id values found here are completely unrelated to the ones found in Core DWH." + columns: + - name: id_verification + data_type: text + description: "unique Superhog generated id for this verification" + tests: + - unique + - not_null + + - name: id_booking + data_type: text + description: "unique Superhog generated id for a booking" + + - name: id_user_host + data_type: text + description: "unique Superhog generated id for host" + + - name: id_accommodation + data_type: text + description: "unique Superhog generated id for a listing" + + - name: version + data_type: text + description: + "value to identify if it is Guesty (V1) or E-deposit (V2)" + tests: + - accepted_values: + values: + - V1 + - V2 + + - name: verification_source + data_type: text + description: + "source of the verification for the booking" + tests: + - accepted_values: + values: + - Guesty + - Edeposit + + - name: verification_status + data_type: text + description: "status of the verification" + + - name: verification_status_reason + data_type: text + description: "short explanation for status" + + - name: nightly_fee_local + data_type: text + description: "fee charged per night" + + - name: number_nights + data_type: integer + description: "number of nights for the booking" + + - name: email_flag + data_type: text + description: "" + + - name: phone_flag + data_type: text + description: "" + + - name: watch_list + data_type: text + description: "" + + - name: channel + data_type: text + description: "" + + - name: checkin_at_utc + data_type: timestamp without time zone + description: "Timestamp of checkin for the booking" + + - name: checkout_at_utc + data_type: timestamp without time zone + description: "Timestamp of checkout for the booking" + + - name: is_cancelled + data_type: boolean + description: "" + + - name: cancelled_at_utc + data_type: timestamp without time zone + description: "Timestamp of cancellation of the booking" + + - name: user_email + data_type: text + description: "" + + - name: guest_email + data_type: text + description: "" + + - name: guest_last_name + data_type: text + description: "" + + - name: guest_first_name + data_type: text + description: "" + + - name: guest_telephone + data_type: text + description: "" + + - name: company_name + data_type: text + description: "" + + - name: property_manager_name + data_type: text + description: "" + + - name: property_manager_email + data_type: text + description: "" + + - name: listing_name + data_type: text + description: "" + + - name: listing_town + data_type: text + description: "" + + - name: listing_country + data_type: text + description: "" + + - name: listing_postcode + data_type: text + description: "" + + - name: pets_allowed + data_type: boolean + description: "" + + - name: level_of_protection_amount + data_type: integer + description: "" + + - name: level_of_protection_currency + data_type: text + description: "" + + - name: status_updated_at_utc + data_type: timestamp without time zone + description: "Timestamp when status was last updated" + + - name: updated_at_utc + data_type: timestamp without time zone + description: "Timestamp of last updated" + + - name: creation_at_utc + data_type: timestamp without time zone + description: "" + + - name: created_at_utc + data_type: timestamp without time zone + description: "" + + - name: int_edeposit__agg_fee_per_user + description: + "This table holds detailed data on revenue generated through e-deposit verifications. + Each record provides insights into booking activities per user, including the number + of bookings, cancellations, and associated fees within specific months. Each record + captures data for bookings created in a particular month along with their corresponding + checkout month, allowing for a comprehensive view of the booking lifecycle and associated + revenues within those periods." + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - year_month_created + - year_month_checkout + - id_user_host + columns: + - name: year_month_created + data_type: text + description: "first day of month of created date" + + - name: year_month_checkout + data_type: text + description: "first day of month of check-out date" + + - name: id_user_host + data_type: text + description: "unique id value for user host" + + - name: bookings_per_month + data_type: bigint + description: "total number of bookings" + + - name: cancelled_per_month + data_type: bigint + description: "number of cancelled bookings" + + - name: cancelled_ratio + data_type: numeric + description: "ratio of cancelled bookings over total bookings" + + - name: sum_cancelled_fee + data_type: numeric + description: "sum of fees charged for cancelled bookings + (currency-less)" + + - name: sum_ok_status_fee + data_type: double precision + description: | + "sum of fees charged for bookings with status 'Approved' or 'Flagged' + (currency-less)" + + - name: sum_rejected_fee + data_type: numeric + description: "sum of fees charged for rejected bookings + (currency-less)" + + - name: total_revenue + data_type: double precision + description: "total sum of fees charged (currency-less)" \ No newline at end of file diff --git a/models/reporting/edeposit/edeposit_agg_fee_per_user.sql b/models/reporting/edeposit/edeposit_agg_fee_per_user.sql new file mode 100644 index 0000000..2a17f57 --- /dev/null +++ b/models/reporting/edeposit/edeposit_agg_fee_per_user.sql @@ -0,0 +1,16 @@ +with + int_edeposit__agg_fee_per_user as ( + select * from {{ ref("int_edeposit__agg_fee_per_user") }} + ) +select + year_month_created as year_month_created, + year_month_checkout as year_month_checkout, + id_user_host as id_user_host, + bookings_per_month as bookings_per_month, + cancelled_per_month as cancelled_per_month, + cancelled_ratio as cancelled_ratio, + sum_cancelled_fee as sum_cancelled_fee, + sum_ok_status_fee as sum_ok_status_fee, + sum_rejected_fee as sum_rejected_fee, + total_revenue as total_revenue +from int_edeposit__agg_fee_per_user diff --git a/models/reporting/edeposit/schema.yaml b/models/reporting/edeposit/schema.yaml new file mode 100644 index 0000000..af44f12 --- /dev/null +++ b/models/reporting/edeposit/schema.yaml @@ -0,0 +1,61 @@ +version: 2 + +models: + - name: edeposit__agg_fee_per_user + description: + "This table holds detailed data on revenue generated through e-deposit verifications. + Each record provides insights into booking activities per user, including the number + of bookings, cancellations, and associated fees within specific months. Each record + captures data for bookings created in a particular month along with their corresponding + checkout month, allowing for a comprehensive view of the booking lifecycle and associated + revenues within those periods." + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - year_month_created + - year_month_checkout + - id_user_host + columns: + - name: year_month_created + data_type: text + description: "first day of month of created date" + + - name: year_month_checkout + data_type: text + description: "first day of month of check-out date" + + - name: id_user_host + data_type: text + description: "unique id value for user host" + + - name: bookings_per_month + data_type: bigint + description: "total number of bookings" + + - name: cancelled_per_month + data_type: bigint + description: "number of cancelled bookings" + + - name: cancelled_ratio + data_type: numeric + description: "ratio of cancelled bookings over total bookings" + + - name: sum_cancelled_fee + data_type: numeric + description: "sum of fees charged for cancelled bookings + (currency-less)" + + - name: sum_ok_status_fee + data_type: double precision + description: | + "sum of fees charged for bookings with status 'Approved' or 'Flagged' + (currency-less)" + + - name: sum_rejected_fee + data_type: numeric + description: "sum of fees charged for rejected bookings + (currency-less)" + + - name: total_revenue + data_type: double precision + description: "total sum of fees charged (currency-less)" \ No newline at end of file From 6adc424963a676c3ca8a9345006ef0131dedf51d Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Thu, 29 Aug 2024 14:25:00 +0200 Subject: [PATCH 2/8] addressed Pablo's comments, removed the repetitive casts, added some nut_null tests and fixed some of the names and descriptions discrepancies --- .../int_edeposit__agg_fee_per_user.sql | 69 ++++++++++++------- .../edeposit/int_edeposit__verifications.sql | 8 ++- models/intermediate/edeposit/schema.yaml | 36 +++++++--- ...ser.sql => edeposit__agg_fee_per_user.sql} | 3 +- models/reporting/edeposit/schema.yaml | 26 +++++-- .../edeposit/stg_edeposit__verifications.sql | 2 +- 6 files changed, 96 insertions(+), 48 deletions(-) rename models/reporting/edeposit/{edeposit_agg_fee_per_user.sql => edeposit__agg_fee_per_user.sql} (82%) diff --git a/models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql b/models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql index 0a61c9c..72bb47c 100644 --- a/models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql +++ b/models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql @@ -11,15 +11,15 @@ with select id_booking, is_cancelled, + id_user_partner, channel, - cast(checkin_at_utc as date) as checkin_date_utc, - cast(checkout_at_utc as date) as checkout_date_utc, + checkin_date_utc, + checkout_date_utc, creation_at_utc, verification_status, - id_user_host, case when verification_status in {{ ok_status }} - then cast(nightly_fee_local as float) * number_nights + then nightly_fee_local * number_nights else 0 end as ok_status_fee, case @@ -30,33 +30,50 @@ with to_char(creation_at_utc, 'YYYY-MM') as year_month_created, to_char(checkout_at_utc, 'YYYY-MM') as year_month_checkout from int_edeposit__verifications - where version = 'V2' -- This version V2 indicates records for e-deposit + where version = 'V2' + ), + calculated_metrics as ( + select + id_user_partner, + year_month_created, + year_month_checkout, + count(distinct id_booking) as bookings_per_month, + sum(cast(is_cancelled as integer)) as cancelled_per_month, + sum(cast(is_cancelled as integer))::decimal + / count(distinct id_booking) as cancelled_ratio, + sum(ok_status_fee) as sum_ok_status_fee, + sum(rejected_fee) as sum_rejected_fee, + case + when + sum(cast(is_cancelled as integer))::decimal + / count(distinct id_booking) + >= {{ cancellation_threshold }} + then true + else false + end as is_cancellation_threshold_surpassed + from edeposit_records + group by year_month_created, year_month_checkout, id_user_partner ) select + id_user_partner, year_month_created, year_month_checkout, - id_user_host, - count(distinct id_booking) as bookings_per_month, - sum(cast(is_cancelled as integer)) as cancelled_per_month, - sum(cast(is_cancelled as integer))::decimal - / count(distinct id_booking) as cancelled_ratio, + bookings_per_month, + cancelled_per_month, + cancelled_ratio, + is_cancellation_threshold_surpassed, case - when - sum(cast(is_cancelled as integer))::decimal / count(distinct id_booking) - >= {{ cancellation_threshold }} - then sum(cast(is_cancelled as integer)) * {{ cancellation_fee }} + when is_cancellation_threshold_surpassed + then cancelled_per_month * {{ cancellation_fee }} else 0 end as sum_cancelled_fee, - sum(ok_status_fee) as sum_ok_status_fee, - sum(rejected_fee) as sum_rejected_fee, - case - when - sum(cast(is_cancelled as integer))::decimal / count(distinct id_booking) - >= {{ cancellation_threshold }} - then sum(cast(is_cancelled as integer)) * {{ cancellation_fee }} + sum_ok_status_fee, + sum_rejected_fee, + sum_ok_status_fee + + sum_rejected_fee + + case + when cancelled_ratio >= {{ cancellation_threshold }} + then cancelled_per_month * {{ cancellation_fee }} else 0 - end - + sum(ok_status_fee) - + sum(rejected_fee) as total_revenue -from edeposit_records -group by year_month_created, year_month_checkout, id_user_host + end as total_revenue +from calculated_metrics diff --git a/models/intermediate/edeposit/int_edeposit__verifications.sql b/models/intermediate/edeposit/int_edeposit__verifications.sql index 8440f50..d835a61 100644 --- a/models/intermediate/edeposit/int_edeposit__verifications.sql +++ b/models/intermediate/edeposit/int_edeposit__verifications.sql @@ -5,9 +5,9 @@ with select -- note that these ids are not the same as the ones found in Core DWH -- they are completely unrelated - id as id_verification, + id_verification, id_booking, - id_user as id_user_host, + id_user as id_user_partner, id_listing as id_accommodation, version, -- V1 for Guesty and V2 for E-deposit case @@ -15,14 +15,16 @@ select end as verification_source, verification_status, verification_status_reason, - nightly_fee_local, + cast(nightly_fee_local as float), cast(checkout_at_utc as date) - cast(checkin_at_utc as date) as number_nights, email_flag, phone_flag, watch_list, channel, checkin_at_utc, + cast(checkin_at_utc as date) as checkin_date_utc, checkout_at_utc, + cast(checkout_at_utc as date) as checkout_date_utc, is_cancelled, cancelled_at_utc, user_email, diff --git a/models/intermediate/edeposit/schema.yaml b/models/intermediate/edeposit/schema.yaml index 2d4aea2..49266dd 100644 --- a/models/intermediate/edeposit/schema.yaml +++ b/models/intermediate/edeposit/schema.yaml @@ -19,9 +19,11 @@ models: data_type: text description: "unique Superhog generated id for a booking" - - name: id_user_host + - name: id_user_partner data_type: text - description: "unique Superhog generated id for host" + description: "unique Superhog generated id for partner" + tests: + - not_null - name: id_accommodation data_type: text @@ -56,7 +58,7 @@ models: description: "short explanation for status" - name: nightly_fee_local - data_type: text + data_type: double precision description: "fee charged per night" - name: number_nights @@ -83,10 +85,18 @@ models: data_type: timestamp without time zone description: "Timestamp of checkin for the booking" + - name: checkin_date_utc + data_type: date + description: "Timestamp of checkin for the booking" + - name: checkout_at_utc data_type: timestamp without time zone description: "Timestamp of checkout for the booking" + - name: checkout_date_utc + data_type: date + description: "Timestamp of checkout for the booking" + - name: is_cancelled data_type: boolean description: "" @@ -184,19 +194,19 @@ models: combination_of_columns: - year_month_created - year_month_checkout - - id_user_host + - id_user_partner columns: + - name: id_user_partner + data_type: text + description: "unique id value for user partner" + - name: year_month_created data_type: text - description: "first day of month of created date" + description: "year and month of created date" - name: year_month_checkout data_type: text - description: "first day of month of check-out date" - - - name: id_user_host - data_type: text - description: "unique id value for user host" + description: "year and month of check-out date" - name: bookings_per_month data_type: bigint @@ -210,6 +220,12 @@ models: data_type: numeric description: "ratio of cancelled bookings over total bookings" + - name: is_cancellation_threshold_surpassed + data_type: boolean + description: "true if the cancellation ratio is higher than the set threshold" + tests: + - not_null + - name: sum_cancelled_fee data_type: numeric description: "sum of fees charged for cancelled bookings diff --git a/models/reporting/edeposit/edeposit_agg_fee_per_user.sql b/models/reporting/edeposit/edeposit__agg_fee_per_user.sql similarity index 82% rename from models/reporting/edeposit/edeposit_agg_fee_per_user.sql rename to models/reporting/edeposit/edeposit__agg_fee_per_user.sql index 2a17f57..facd144 100644 --- a/models/reporting/edeposit/edeposit_agg_fee_per_user.sql +++ b/models/reporting/edeposit/edeposit__agg_fee_per_user.sql @@ -5,10 +5,11 @@ with select year_month_created as year_month_created, year_month_checkout as year_month_checkout, - id_user_host as id_user_host, + id_user_partner as id_user_partner, bookings_per_month as bookings_per_month, cancelled_per_month as cancelled_per_month, cancelled_ratio as cancelled_ratio, + is_cancellation_threshold_surpassed as is_cancellation_threshold_surpassed, sum_cancelled_fee as sum_cancelled_fee, sum_ok_status_fee as sum_ok_status_fee, sum_rejected_fee as sum_rejected_fee, diff --git a/models/reporting/edeposit/schema.yaml b/models/reporting/edeposit/schema.yaml index af44f12..b3b1c4b 100644 --- a/models/reporting/edeposit/schema.yaml +++ b/models/reporting/edeposit/schema.yaml @@ -14,19 +14,25 @@ models: combination_of_columns: - year_month_created - year_month_checkout - - id_user_host + - id_user_partner columns: + - name: id_user_partner + data_type: text + description: "unique id value for user partner" + test: + - not_null + - name: year_month_created data_type: text - description: "first day of month of created date" + description: "year and month of created date" + test: + - not_null - name: year_month_checkout data_type: text - description: "first day of month of check-out date" - - - name: id_user_host - data_type: text - description: "unique id value for user host" + description: "year and month of check-out date" + test: + - not_null - name: bookings_per_month data_type: bigint @@ -40,6 +46,12 @@ models: data_type: numeric description: "ratio of cancelled bookings over total bookings" + - name: is_cancellation_threshold_surpassed + data_type: boolean + description: "true if the cancellation ratio is higher than the set threshold" + tests: + - not_null + - name: sum_cancelled_fee data_type: numeric description: "sum of fees charged for cancelled bookings diff --git a/models/staging/edeposit/stg_edeposit__verifications.sql b/models/staging/edeposit/stg_edeposit__verifications.sql index a787a19..48b9a7b 100644 --- a/models/staging/edeposit/stg_edeposit__verifications.sql +++ b/models/staging/edeposit/stg_edeposit__verifications.sql @@ -27,7 +27,7 @@ with ), stg_edeposit__verifications as ( select - {{ adapter.quote("documents") }} ->> 'id' as id, + {{ adapter.quote("documents") }} ->> 'id' as id_verification, {{ adapter.quote("documents") }} ->> 'BookingId' as id_booking, {{ adapter.quote("documents") }} ->> 'userId' as id_user, {{ adapter.quote("documents") }} ->> 'ListingId' as id_listing, From 42510bbb4d0cbfcd50b36b80bb2d1eb262ecc32d Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Fri, 30 Aug 2024 10:33:43 +0200 Subject: [PATCH 3/8] Just committing to save change and create a new branch for basic to push basic changes --- .../intermediate/edeposit/int_edeposit__agg_fee_per_user.sql | 4 ++-- models/intermediate/edeposit/int_edeposit__verifications.sql | 2 +- models/staging/edeposit/schema.yml | 2 +- models/staging/edeposit/stg_edeposit__verifications.sql | 4 +++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql b/models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql index 72bb47c..0a76658 100644 --- a/models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql +++ b/models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql @@ -32,7 +32,7 @@ with from int_edeposit__verifications where version = 'V2' ), - calculated_metrics as ( + aggregated_partner_booking_metrics as ( select id_user_partner, year_month_created, @@ -76,4 +76,4 @@ select then cancelled_per_month * {{ cancellation_fee }} else 0 end as total_revenue -from calculated_metrics +from aggregated_partner_booking_metrics diff --git a/models/intermediate/edeposit/int_edeposit__verifications.sql b/models/intermediate/edeposit/int_edeposit__verifications.sql index d835a61..0a62957 100644 --- a/models/intermediate/edeposit/int_edeposit__verifications.sql +++ b/models/intermediate/edeposit/int_edeposit__verifications.sql @@ -15,7 +15,7 @@ select end as verification_source, verification_status, verification_status_reason, - cast(nightly_fee_local as float), + nightly_fee_local, cast(checkout_at_utc as date) - cast(checkin_at_utc as date) as number_nights, email_flag, phone_flag, diff --git a/models/staging/edeposit/schema.yml b/models/staging/edeposit/schema.yml index a90dec6..4e201a1 100644 --- a/models/staging/edeposit/schema.yml +++ b/models/staging/edeposit/schema.yml @@ -6,7 +6,7 @@ models: "Records of each transaction that happens in the edeposit API. Records are mutable and can get updated." columns: - - name: id + - name: id_verification data_type: character varying description: "Unique id for the specific transaction." tests: diff --git a/models/staging/edeposit/stg_edeposit__verifications.sql b/models/staging/edeposit/stg_edeposit__verifications.sql index 48b9a7b..c97b331 100644 --- a/models/staging/edeposit/stg_edeposit__verifications.sql +++ b/models/staging/edeposit/stg_edeposit__verifications.sql @@ -34,7 +34,9 @@ with {{ adapter.quote("documents") }} ->> 'Version' as "version", - {{ adapter.quote("documents") }} ->> 'NightlyFee' as "nightly_fee_local", + cast( + {{ adapter.quote("documents") }} ->> 'NightlyFee' as decimal(19, 4) + ) as "nightly_fee_local", {{ adapter.quote("documents") }} ->> 'Status' as verification_status, {{ adapter.quote("documents") }} From fd98f31fdd1dc41793eaf16c9e9c5c013d17d50d Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Fri, 30 Aug 2024 10:54:00 +0200 Subject: [PATCH 4/8] Kept basic models to reduce complexity of models/20125_edeposit_migration_agg_model_reporting --- .../int_edeposit__agg_fee_per_user.sql | 79 ------------------- models/intermediate/edeposit/schema.yaml | 65 --------------- .../edeposit/edeposit__agg_fee_per_user.sql | 17 ---- models/reporting/edeposit/schema.yaml | 72 +---------------- 4 files changed, 1 insertion(+), 232 deletions(-) delete mode 100644 models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql delete mode 100644 models/reporting/edeposit/edeposit__agg_fee_per_user.sql diff --git a/models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql b/models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql deleted file mode 100644 index 0a76658..0000000 --- a/models/intermediate/edeposit/int_edeposit__agg_fee_per_user.sql +++ /dev/null @@ -1,79 +0,0 @@ -{% set ok_status = ("Approved", "Flagged") %} -{% set rejected_status = "Rejected" %} -{% set rejected_fee = 0.25 %} -{% set cancellation_fee = 0.25 %} -{% set cancellation_threshold = 0.05 %} -with - int_edeposit__verifications as ( - select * from {{ ref("int_edeposit__verifications") }} - ), - edeposit_records as ( - select - id_booking, - is_cancelled, - id_user_partner, - channel, - checkin_date_utc, - checkout_date_utc, - creation_at_utc, - verification_status, - case - when verification_status in {{ ok_status }} - then nightly_fee_local * number_nights - else 0 - end as ok_status_fee, - case - when verification_status = '{{ rejected_status }}' - then {{ rejected_fee }} - else 0 - end as rejected_fee, - to_char(creation_at_utc, 'YYYY-MM') as year_month_created, - to_char(checkout_at_utc, 'YYYY-MM') as year_month_checkout - from int_edeposit__verifications - where version = 'V2' - ), - aggregated_partner_booking_metrics as ( - select - id_user_partner, - year_month_created, - year_month_checkout, - count(distinct id_booking) as bookings_per_month, - sum(cast(is_cancelled as integer)) as cancelled_per_month, - sum(cast(is_cancelled as integer))::decimal - / count(distinct id_booking) as cancelled_ratio, - sum(ok_status_fee) as sum_ok_status_fee, - sum(rejected_fee) as sum_rejected_fee, - case - when - sum(cast(is_cancelled as integer))::decimal - / count(distinct id_booking) - >= {{ cancellation_threshold }} - then true - else false - end as is_cancellation_threshold_surpassed - from edeposit_records - group by year_month_created, year_month_checkout, id_user_partner - ) -select - id_user_partner, - year_month_created, - year_month_checkout, - bookings_per_month, - cancelled_per_month, - cancelled_ratio, - is_cancellation_threshold_surpassed, - case - when is_cancellation_threshold_surpassed - then cancelled_per_month * {{ cancellation_fee }} - else 0 - end as sum_cancelled_fee, - sum_ok_status_fee, - sum_rejected_fee, - sum_ok_status_fee - + sum_rejected_fee - + case - when cancelled_ratio >= {{ cancellation_threshold }} - then cancelled_per_month * {{ cancellation_fee }} - else 0 - end as total_revenue -from aggregated_partner_booking_metrics diff --git a/models/intermediate/edeposit/schema.yaml b/models/intermediate/edeposit/schema.yaml index 49266dd..65e9bfa 100644 --- a/models/intermediate/edeposit/schema.yaml +++ b/models/intermediate/edeposit/schema.yaml @@ -180,68 +180,3 @@ models: - name: created_at_utc data_type: timestamp without time zone description: "" - - - name: int_edeposit__agg_fee_per_user - description: - "This table holds detailed data on revenue generated through e-deposit verifications. - Each record provides insights into booking activities per user, including the number - of bookings, cancellations, and associated fees within specific months. Each record - captures data for bookings created in a particular month along with their corresponding - checkout month, allowing for a comprehensive view of the booking lifecycle and associated - revenues within those periods." - tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - year_month_created - - year_month_checkout - - id_user_partner - columns: - - name: id_user_partner - data_type: text - description: "unique id value for user partner" - - - name: year_month_created - data_type: text - description: "year and month of created date" - - - name: year_month_checkout - data_type: text - description: "year and month of check-out date" - - - name: bookings_per_month - data_type: bigint - description: "total number of bookings" - - - name: cancelled_per_month - data_type: bigint - description: "number of cancelled bookings" - - - name: cancelled_ratio - data_type: numeric - description: "ratio of cancelled bookings over total bookings" - - - name: is_cancellation_threshold_surpassed - data_type: boolean - description: "true if the cancellation ratio is higher than the set threshold" - tests: - - not_null - - - name: sum_cancelled_fee - data_type: numeric - description: "sum of fees charged for cancelled bookings - (currency-less)" - - - name: sum_ok_status_fee - data_type: double precision - description: | - "sum of fees charged for bookings with status 'Approved' or 'Flagged' - (currency-less)" - - - name: sum_rejected_fee - data_type: numeric - description: "sum of fees charged for rejected bookings - (currency-less)" - - - name: total_revenue - data_type: double precision - description: "total sum of fees charged (currency-less)" \ No newline at end of file diff --git a/models/reporting/edeposit/edeposit__agg_fee_per_user.sql b/models/reporting/edeposit/edeposit__agg_fee_per_user.sql deleted file mode 100644 index facd144..0000000 --- a/models/reporting/edeposit/edeposit__agg_fee_per_user.sql +++ /dev/null @@ -1,17 +0,0 @@ -with - int_edeposit__agg_fee_per_user as ( - select * from {{ ref("int_edeposit__agg_fee_per_user") }} - ) -select - year_month_created as year_month_created, - year_month_checkout as year_month_checkout, - id_user_partner as id_user_partner, - bookings_per_month as bookings_per_month, - cancelled_per_month as cancelled_per_month, - cancelled_ratio as cancelled_ratio, - is_cancellation_threshold_surpassed as is_cancellation_threshold_surpassed, - sum_cancelled_fee as sum_cancelled_fee, - sum_ok_status_fee as sum_ok_status_fee, - sum_rejected_fee as sum_rejected_fee, - total_revenue as total_revenue -from int_edeposit__agg_fee_per_user diff --git a/models/reporting/edeposit/schema.yaml b/models/reporting/edeposit/schema.yaml index b3b1c4b..e95673b 100644 --- a/models/reporting/edeposit/schema.yaml +++ b/models/reporting/edeposit/schema.yaml @@ -1,73 +1,3 @@ version: 2 -models: - - name: edeposit__agg_fee_per_user - description: - "This table holds detailed data on revenue generated through e-deposit verifications. - Each record provides insights into booking activities per user, including the number - of bookings, cancellations, and associated fees within specific months. Each record - captures data for bookings created in a particular month along with their corresponding - checkout month, allowing for a comprehensive view of the booking lifecycle and associated - revenues within those periods." - tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - year_month_created - - year_month_checkout - - id_user_partner - columns: - - name: id_user_partner - data_type: text - description: "unique id value for user partner" - test: - - not_null - - - name: year_month_created - data_type: text - description: "year and month of created date" - test: - - not_null - - - name: year_month_checkout - data_type: text - description: "year and month of check-out date" - test: - - not_null - - - name: bookings_per_month - data_type: bigint - description: "total number of bookings" - - - name: cancelled_per_month - data_type: bigint - description: "number of cancelled bookings" - - - name: cancelled_ratio - data_type: numeric - description: "ratio of cancelled bookings over total bookings" - - - name: is_cancellation_threshold_surpassed - data_type: boolean - description: "true if the cancellation ratio is higher than the set threshold" - tests: - - not_null - - - name: sum_cancelled_fee - data_type: numeric - description: "sum of fees charged for cancelled bookings - (currency-less)" - - - name: sum_ok_status_fee - data_type: double precision - description: | - "sum of fees charged for bookings with status 'Approved' or 'Flagged' - (currency-less)" - - - name: sum_rejected_fee - data_type: numeric - description: "sum of fees charged for rejected bookings - (currency-less)" - - - name: total_revenue - data_type: double precision - description: "total sum of fees charged (currency-less)" \ No newline at end of file +models: \ No newline at end of file From 7a77691b892f772b4a5c9441498ea8b49bcfe515 Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Fri, 30 Aug 2024 10:58:24 +0200 Subject: [PATCH 5/8] deleted schema from edeposit reporting --- models/reporting/edeposit/schema.yaml | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 models/reporting/edeposit/schema.yaml diff --git a/models/reporting/edeposit/schema.yaml b/models/reporting/edeposit/schema.yaml deleted file mode 100644 index e95673b..0000000 --- a/models/reporting/edeposit/schema.yaml +++ /dev/null @@ -1,3 +0,0 @@ -version: 2 - -models: \ No newline at end of file From 46d5e7c3c5d7a624c588280f91f8fd36ad9b1828 Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Mon, 2 Sep 2024 11:16:51 +0200 Subject: [PATCH 6/8] Updating with Ray's comments --- .../edeposit/int_edeposit__verifications.sql | 8 +++- models/intermediate/edeposit/schema.yaml | 46 +++++++++++++------ 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/models/intermediate/edeposit/int_edeposit__verifications.sql b/models/intermediate/edeposit/int_edeposit__verifications.sql index 0a62957..32b6a44 100644 --- a/models/intermediate/edeposit/int_edeposit__verifications.sql +++ b/models/intermediate/edeposit/int_edeposit__verifications.sql @@ -14,7 +14,6 @@ select when version = 'V1' then 'Guesty' when version = 'V2' then 'Edeposit' else null end as verification_source, verification_status, - verification_status_reason, nightly_fee_local, cast(checkout_at_utc as date) - cast(checkin_at_utc as date) as number_nights, email_flag, @@ -27,6 +26,7 @@ select cast(checkout_at_utc as date) as checkout_date_utc, is_cancelled, cancelled_at_utc, + cast(cancelled_at_utc as date) as cancelled_date_utc, user_email, guest_email, guest_last_name, @@ -43,7 +43,11 @@ select level_of_protection_amount, level_of_protection_currency, status_updated_at_utc, + cast(status_updated_date_utc as date) as status_updated_date_utc, updated_at_utc, + cast(updated_at_utc as date) as updated_date_utc, creation_at_utc, - created_at_utc + cast(creation_at_utc as date) as creation_date_utc, + created_at_utc, + cast(created_at_utc as date) as created_date_utc from stg_edeposit__verifications diff --git a/models/intermediate/edeposit/schema.yaml b/models/intermediate/edeposit/schema.yaml index 65e9bfa..cf0d6d4 100644 --- a/models/intermediate/edeposit/schema.yaml +++ b/models/intermediate/edeposit/schema.yaml @@ -6,7 +6,11 @@ models: "This table holds records on verifications for e-deposit bookings. It contains details on validations checked on the guests, guest information and some booking details like checkin-checkout date or the status of the verification. - The id values found here are completely unrelated to the ones found in Core DWH." + The id values found here are completely unrelated to the ones found in Core DWH. + + Note that id_verifications and booking_id should normally be 1 to 1. + Though there are exception, the API will accept a duplicate booking and the users + will be charged for it. A duplicate would return a unique id_verification." columns: - name: id_verification data_type: text @@ -53,10 +57,6 @@ models: data_type: text description: "status of the verification" - - name: verification_status_reason - data_type: text - description: "short explanation for status" - - name: nightly_fee_local data_type: double precision description: "fee charged per night" @@ -67,15 +67,15 @@ models: - name: email_flag data_type: text - description: "" + description: "screening result for email" - name: phone_flag data_type: text - description: "" + description: "screening result for phone" - name: watch_list data_type: text - description: "" + description: "screening result of the guest" - name: channel data_type: text @@ -87,7 +87,7 @@ models: - name: checkin_date_utc data_type: date - description: "Timestamp of checkin for the booking" + description: "Date of checkin for the booking" - name: checkout_at_utc data_type: timestamp without time zone @@ -95,7 +95,7 @@ models: - name: checkout_date_utc data_type: date - description: "Timestamp of checkout for the booking" + description: "Date of checkout for the booking" - name: is_cancelled data_type: boolean @@ -105,6 +105,10 @@ models: data_type: timestamp without time zone description: "Timestamp of cancellation of the booking" + - name: cancelled_date_utc + data_type: date + description: "Date of cancellation for the booking" + - name: user_email data_type: text description: "" @@ -169,14 +173,30 @@ models: data_type: timestamp without time zone description: "Timestamp when status was last updated" + - name: status_updated_date_utc + data_type: date + description: "Date of last status update of the verification" + - name: updated_at_utc data_type: timestamp without time zone - description: "Timestamp of last updated" + description: "Timestamp of last updated of the verification" + + - name: updated_date_utc + data_type: date + description: "Date of last update of the verification" - name: creation_at_utc data_type: timestamp without time zone - description: "" + description: "Athena timestamp field of when the booking was created" + + - name: creation_date_utc + data_type: date + description: "Athena date field of when the booking was created" - name: created_at_utc data_type: timestamp without time zone - description: "" + description: "Timestamp of creation of the verification in the system" + + - name: created_date_utc + data_type: date + description: "Date of creation of the verification in the system" From ffef9e3ff2fcfc7660b078041a594781650b687e Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Mon, 2 Sep 2024 12:52:16 +0200 Subject: [PATCH 7/8] Added all date_utc fields --- models/intermediate/edeposit/int_edeposit__verifications.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/intermediate/edeposit/int_edeposit__verifications.sql b/models/intermediate/edeposit/int_edeposit__verifications.sql index 32b6a44..aa19aac 100644 --- a/models/intermediate/edeposit/int_edeposit__verifications.sql +++ b/models/intermediate/edeposit/int_edeposit__verifications.sql @@ -43,7 +43,7 @@ select level_of_protection_amount, level_of_protection_currency, status_updated_at_utc, - cast(status_updated_date_utc as date) as status_updated_date_utc, + cast(status_updated_at_utc as date) as status_updated_date_utc, updated_at_utc, cast(updated_at_utc as date) as updated_date_utc, creation_at_utc, From 89792cf0b728fffe02f0da526b09eb484f6d6439 Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Mon, 2 Sep 2024 17:01:18 +0200 Subject: [PATCH 8/8] final comments --- models/intermediate/edeposit/schema.yaml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/models/intermediate/edeposit/schema.yaml b/models/intermediate/edeposit/schema.yaml index cf0d6d4..b36b556 100644 --- a/models/intermediate/edeposit/schema.yaml +++ b/models/intermediate/edeposit/schema.yaml @@ -185,13 +185,19 @@ models: data_type: date description: "Date of last update of the verification" - - name: creation_at_utc + - name: athena_creation_at_utc data_type: timestamp without time zone - description: "Athena timestamp field of when the booking was created" + description: + "Athena timestamp referring to when the booking was created. + It's provided by Guesty, but is not mandatory. + In case of doubt use created_at_utc or created_date_utc fields" - - name: creation_date_utc + - name: athena_creation_date_utc data_type: date - description: "Athena date field of when the booking was created" + description: + "Athena date referring to when the booking was created. + It's provided by Guesty, but is not mandatory. + In case of doubt use created_at_utc or created_date_utc fields" - name: created_at_utc data_type: timestamp without time zone