From 7786f2e770ac763b5f6759a95d8a47940e713953 Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Tue, 27 Aug 2024 14:55:29 +0200 Subject: [PATCH 1/6] 1st commit edeposit_verifications --- .../core/int_core__edeposit_verifications.sql | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 models/intermediate/core/int_core__edeposit_verifications.sql diff --git a/models/intermediate/core/int_core__edeposit_verifications.sql b/models/intermediate/core/int_core__edeposit_verifications.sql new file mode 100644 index 0000000..88f0e3d --- /dev/null +++ b/models/intermediate/core/int_core__edeposit_verifications.sql @@ -0,0 +1,70 @@ +with + stg_edeposit__verifications as ( + select * from {{ ref("stg_edeposit__verifications") }} + ), + edeposit_records as ( + select + id_booking, + cancelled_at_utc, + is_cancelled, + channel, + cast(checkin_at_utc as date) as checkin_date_utc, + cast(checkout_at_utc as date) as checkout_date_utc, + property_manager_name, + created_at_utc, + creation_at_utc, + id_listing, + listing_country, + listing_town, + cast(checkout_at_utc as date) - cast(checkin_at_utc as date) as nights, + cast(nightly_fee_local as float) as nightly_fee_local, + verification_status, + status_updated_at_utc, + updated_at_utc, + id_user, + id as verification_id, + case + when verification_status = 'Approved' or verification_status = 'Flagged' + then cast(nightly_fee_local as float) + else 0 + end as ok_status_night_fee, + case + when verification_status = 'Approved' or verification_status = 'Flagged' + then + cast(nightly_fee_local as float) + * (cast(checkout_at_utc as date) - cast(checkin_at_utc as date)) + else 0 + end as ok_status_fee, + case + when verification_status = 'Rejected' then 0.25 else 0 + end as rejected_fee, + to_char(creation_at_utc, 'YYYY-MM') as year_month_created, + to_char(checkout_at_utc, 'YYYY-MM') as year_month_checkout, + "version" + from {{ ref("stg_edeposit__verifications") }} -- Use ref() to reference other dbt models + where version = 'V2' + ) +select + year_month_created, + year_month_checkout, + id_user, + count(*) as bookings_per_month, + sum(cast(is_cancelled as integer)) as cancelled_per_month, + sum(cast(is_cancelled as integer))::decimal / count(*) as cancelled_ratio, + case + when sum(cast(is_cancelled as integer))::decimal / count(*) >= 0.05 + then sum(cast(is_cancelled as integer)) * 0.25 + else 0 + end as sum_cancelled_fee, + sum(ok_status_fee) as ok_status_fee_sum, + sum(rejected_fee) as rejected_fee_sum, + case + when sum(cast(is_cancelled as integer))::decimal / count(*) >= 0.05 + then sum(cast(is_cancelled as integer)) * 0.25 + else 0 + end + + sum(ok_status_fee) + + sum(rejected_fee) as total_revenue +from edeposit_records +group by year_month_created, year_month_checkout, id_user +order by year_month_created From a1e31747002f8779fd3991166318f7ec79f38d0e Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Tue, 27 Aug 2024 16:57:55 +0200 Subject: [PATCH 2/6] e-deposit verifications data to intermediate --- .../core/int_core__edeposit_verifications.sql | 20 +------ models/intermediate/core/schema.yaml | 55 +++++++++++++++++++ 2 files changed, 56 insertions(+), 19 deletions(-) diff --git a/models/intermediate/core/int_core__edeposit_verifications.sql b/models/intermediate/core/int_core__edeposit_verifications.sql index 88f0e3d..61a81d0 100644 --- a/models/intermediate/core/int_core__edeposit_verifications.sql +++ b/models/intermediate/core/int_core__edeposit_verifications.sql @@ -4,30 +4,13 @@ with ), edeposit_records as ( select - id_booking, - cancelled_at_utc, is_cancelled, channel, cast(checkin_at_utc as date) as checkin_date_utc, cast(checkout_at_utc as date) as checkout_date_utc, - property_manager_name, - created_at_utc, creation_at_utc, - id_listing, - listing_country, - listing_town, - cast(checkout_at_utc as date) - cast(checkin_at_utc as date) as nights, - cast(nightly_fee_local as float) as nightly_fee_local, verification_status, - status_updated_at_utc, - updated_at_utc, id_user, - id as verification_id, - case - when verification_status = 'Approved' or verification_status = 'Flagged' - then cast(nightly_fee_local as float) - else 0 - end as ok_status_night_fee, case when verification_status = 'Approved' or verification_status = 'Flagged' then @@ -39,8 +22,7 @@ with when verification_status = 'Rejected' then 0.25 else 0 end as rejected_fee, to_char(creation_at_utc, 'YYYY-MM') as year_month_created, - to_char(checkout_at_utc, 'YYYY-MM') as year_month_checkout, - "version" + to_char(checkout_at_utc, 'YYYY-MM') as year_month_checkout from {{ ref("stg_edeposit__verifications") }} -- Use ref() to reference other dbt models where version = 'V2' ) diff --git a/models/intermediate/core/schema.yaml b/models/intermediate/core/schema.yaml index aabb776..08fe0ef 100644 --- a/models/intermediate/core/schema.yaml +++ b/models/intermediate/core/schema.yaml @@ -2767,3 +2767,58 @@ models: data_type: integer description: | Integer-based flag version of total_bookings_with_product_bundle_with_paid_service. + - name: int_core__edeposit_verifications + description: + "This table holds detailed data on revenue generated through e-deposit verifications. + Each record provides insights into booking activities per user, including the number + of bookings, cancellations, and associated fees within specific months. Each record + captures data for bookings created in a particular month along with their corresponding + checkout month, allowing for a comprehensive view of the booking lifecycle and associated + revenues within those periods." + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - year_month_created + - year_month_checkout + - id_user + columns: + - name: year_month_created + data_type: text + description: "first day of month of created date" + + - name: year_month_checkout + data_type: text + description: "first day of month of check-out date" + + - name: id_user + data_type: text + description: "unique id value for user" + + - name: bookings_per_month + data_type: bigint + description: "total number of bookings" + + - name: cancelled_per_month + data_type: bigint + description: "number of cancelled bookings" + + - name: cancelled_ratio + data_type: numeric + description: "ratio of cancelled bookings over total bookings" + + - name: sum_cancelled_fee + data_type: numeric + description: "sum of fees charged for cancelled bookings" + + - name: ok_status_fee_sum + data_type: double precision + description: | + "sum of fees charged for bookings with status 'Approved' or 'Flagged'" + + - name: rejected_fee_sum + data_type: numeric + description: "sum of fees charged for rejected bookings" + + - name: total_revenue + data_type: double precision + description: "total sum of fees charged" From a4dc798f868dfd31acf77a8cc3267df1c9ebf169 Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Wed, 28 Aug 2024 09:58:14 +0200 Subject: [PATCH 3/6] e-deposit model WIP --- .../core/int_core__edeposit_verifications.sql | 82 ++++----- models/intermediate/core/schema.yaml | 166 +++++++++++++++--- 2 files changed, 178 insertions(+), 70 deletions(-) diff --git a/models/intermediate/core/int_core__edeposit_verifications.sql b/models/intermediate/core/int_core__edeposit_verifications.sql index 61a81d0..2292dc9 100644 --- a/models/intermediate/core/int_core__edeposit_verifications.sql +++ b/models/intermediate/core/int_core__edeposit_verifications.sql @@ -1,52 +1,42 @@ with stg_edeposit__verifications as ( select * from {{ ref("stg_edeposit__verifications") }} - ), - edeposit_records as ( - select - is_cancelled, - channel, - cast(checkin_at_utc as date) as checkin_date_utc, - cast(checkout_at_utc as date) as checkout_date_utc, - creation_at_utc, - verification_status, - id_user, - case - when verification_status = 'Approved' or verification_status = 'Flagged' - then - cast(nightly_fee_local as float) - * (cast(checkout_at_utc as date) - cast(checkin_at_utc as date)) - else 0 - end as ok_status_fee, - case - when verification_status = 'Rejected' then 0.25 else 0 - end as rejected_fee, - to_char(creation_at_utc, 'YYYY-MM') as year_month_created, - to_char(checkout_at_utc, 'YYYY-MM') as year_month_checkout - from {{ ref("stg_edeposit__verifications") }} -- Use ref() to reference other dbt models - where version = 'V2' ) select - year_month_created, - year_month_checkout, + id as id_verification, + id_booking, id_user, - count(*) as bookings_per_month, - sum(cast(is_cancelled as integer)) as cancelled_per_month, - sum(cast(is_cancelled as integer))::decimal / count(*) as cancelled_ratio, - case - when sum(cast(is_cancelled as integer))::decimal / count(*) >= 0.05 - then sum(cast(is_cancelled as integer)) * 0.25 - else 0 - end as sum_cancelled_fee, - sum(ok_status_fee) as ok_status_fee_sum, - sum(rejected_fee) as rejected_fee_sum, - case - when sum(cast(is_cancelled as integer))::decimal / count(*) >= 0.05 - then sum(cast(is_cancelled as integer)) * 0.25 - else 0 - end - + sum(ok_status_fee) - + sum(rejected_fee) as total_revenue -from edeposit_records -group by year_month_created, year_month_checkout, id_user -order by year_month_created + id_listing, + version, -- V1 for guesty and V2 for e-deposit verifications + verification_status, + verification_status_reason, + nightly_fee_local, + cast(checkout_at_utc as date) - cast(checkin_at_utc as date) as number_nights, + email_flag, + phone_flag, + watch_list, + channel, + checkin_at_utc, + checkout_at_utc, + is_cancelled, + cancelled_at_utc, + user_email, + guest_email, + guest_last_name, + guest_first_name, + guest_telephone, + company_name, + property_manager_name, + property_manager_email, + listing_name, + listing_town, + listing_country, + listing_postcode, + pets_allowed, + level_of_protection_amount, + level_of_protection_currency, + status_updated_at_utc, + updated_at_utc, + creation_at_utc, + created_at_utc +from stg_edeposit__verifications diff --git a/models/intermediate/core/schema.yaml b/models/intermediate/core/schema.yaml index 08fe0ef..b74cd2d 100644 --- a/models/intermediate/core/schema.yaml +++ b/models/intermediate/core/schema.yaml @@ -2781,44 +2781,162 @@ models: - year_month_created - year_month_checkout - id_user + - name: int__verifications + description: "" columns: - - name: year_month_created + - name: id_verification data_type: text - description: "first day of month of created date" + description: "unique Superhog generated id for this verification" + tests: + - unique + - not_null - - name: year_month_checkout + - name: id_booking data_type: text - description: "first day of month of check-out date" + description: "unique Superhog generated id for a booking" - name: id_user data_type: text - description: "unique id value for user" + description: "unique Superhog generated id for a guest" - - name: bookings_per_month - data_type: bigint - description: "total number of bookings" + - name: id_listing + data_type: text + description: "unique Superhog generated id for a listing" - - name: cancelled_per_month - data_type: bigint - description: "number of cancelled bookings" + - name: version + data_type: text + description: + "V1 for guesty verifications + V2 fo e-deposit verifications" + tests: + - accepted_values: + values: + - V1 + - V2 - - name: cancelled_ratio - data_type: numeric - description: "ratio of cancelled bookings over total bookings" + - name: verification_status + data_type: text + description: "status of the verification" - - name: sum_cancelled_fee - data_type: numeric - description: "sum of fees charged for cancelled bookings" + - name: verification_status_reason + data_type: text + description: "" - - name: ok_status_fee_sum - data_type: double precision - description: | - "sum of fees charged for bookings with status 'Approved' or 'Flagged'" + - name: nightly_fee_local + data_type: text + description: "fee charged per night" - - name: rejected_fee_sum - data_type: numeric - description: "sum of fees charged for rejected bookings" + - name: number_nights + data_type: integer + description: "number of nights for the booking" - name: total_revenue data_type: double precision description: "total sum of fees charged" + - name: email_flag + data_type: text + description: "" + + - name: phone_flag + data_type: text + description: "" + + - name: watch_list + data_type: text + description: "" + + - name: channel + data_type: text + description: "" + + - name: checkin_at_utc + data_type: timestamp without time zone + description: "" + + - name: checkout_at_utc + data_type: timestamp without time zone + description: "" + + - name: is_cancelled + data_type: boolean + description: "" + + - name: cancelled_at_utc + data_type: timestamp without time zone + description: "" + + - name: user_email + data_type: text + description: "" + + - name: guest_email + data_type: text + description: "" + + - name: guest_last_name + data_type: text + description: "" + + - name: guest_first_name + data_type: text + description: "" + + - name: guest_telephone + data_type: text + description: "" + + - name: company_name + data_type: text + description: "" + + - name: property_manager_name + data_type: text + description: "" + + - name: property_manager_email + data_type: text + description: "" + + - name: listing_name + data_type: text + description: "" + + - name: listing_town + data_type: text + description: "" + + - name: listing_country + data_type: text + description: "" + + - name: listing_postcode + data_type: text + description: "" + + - name: pets_allowed + data_type: boolean + description: "" + + - name: level_of_protection_amount + data_type: integer + description: "" + + - name: level_of_protection_currency + data_type: text + description: "" + + - name: status_updated_at_utc + data_type: timestamp without time zone + description: "" + + - name: updated_at_utc + data_type: timestamp without time zone + description: "" + + - name: creation_at_utc + data_type: timestamp without time zone + description: "" + + - name: created_at_utc + data_type: timestamp without time zone + description: "" From 5892fe7cbbefab92684e188251bcec2ac1f22933 Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Wed, 28 Aug 2024 10:27:36 +0200 Subject: [PATCH 4/6] Changed it to basic model --- ...ns.sql => int_edeposit__verifications.sql} | 0 models/intermediate/core/schema.yaml | 49 +++++++++---------- 2 files changed, 23 insertions(+), 26 deletions(-) rename models/intermediate/core/{int_core__edeposit_verifications.sql => int_edeposit__verifications.sql} (100%) diff --git a/models/intermediate/core/int_core__edeposit_verifications.sql b/models/intermediate/core/int_edeposit__verifications.sql similarity index 100% rename from models/intermediate/core/int_core__edeposit_verifications.sql rename to models/intermediate/core/int_edeposit__verifications.sql diff --git a/models/intermediate/core/schema.yaml b/models/intermediate/core/schema.yaml index b74cd2d..a5d3575 100644 --- a/models/intermediate/core/schema.yaml +++ b/models/intermediate/core/schema.yaml @@ -2767,22 +2767,13 @@ models: data_type: integer description: | Integer-based flag version of total_bookings_with_product_bundle_with_paid_service. - - name: int_core__edeposit_verifications + + - name: int_edeposit__verifications description: - "This table holds detailed data on revenue generated through e-deposit verifications. - Each record provides insights into booking activities per user, including the number - of bookings, cancellations, and associated fees within specific months. Each record - captures data for bookings created in a particular month along with their corresponding - checkout month, allowing for a comprehensive view of the booking lifecycle and associated - revenues within those periods." - tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - year_month_created - - year_month_checkout - - id_user - - name: int__verifications - description: "" + "This table holds records on verifications for e-deposit bookings. + It contains details on validations checked on the guests, guest information + and some booking details like checkin-checkout date or the status of the verification. + The id values found here are completely unrelated to the ones found in Core DWH." columns: - name: id_verification data_type: text @@ -2806,21 +2797,30 @@ models: - name: version data_type: text description: - "V1 for guesty verifications - V2 fo e-deposit verifications" + "value to identify if it is Guesty (V1) or E-deposit (V2)" tests: - accepted_values: values: - V1 - V2 + - name: verification_source + data_type: text + description: + "source of the verification for the booking" + tests: + - accepted_values: + values: + - Guesty + - Edeposit + - name: verification_status data_type: text description: "status of the verification" - name: verification_status_reason data_type: text - description: "" + description: "short explanation for status" - name: nightly_fee_local data_type: text @@ -2830,9 +2830,6 @@ models: data_type: integer description: "number of nights for the booking" - - name: total_revenue - data_type: double precision - description: "total sum of fees charged" - name: email_flag data_type: text description: "" @@ -2851,11 +2848,11 @@ models: - name: checkin_at_utc data_type: timestamp without time zone - description: "" + description: "Timestamp of checkin for the booking" - name: checkout_at_utc data_type: timestamp without time zone - description: "" + description: "Timestamp of checkout for the booking" - name: is_cancelled data_type: boolean @@ -2863,7 +2860,7 @@ models: - name: cancelled_at_utc data_type: timestamp without time zone - description: "" + description: "Timestamp of cancellation of the booking" - name: user_email data_type: text @@ -2927,11 +2924,11 @@ models: - name: status_updated_at_utc data_type: timestamp without time zone - description: "" + description: "Timestamp when status was last updated" - name: updated_at_utc data_type: timestamp without time zone - description: "" + description: "Timestamp of last updated" - name: creation_at_utc data_type: timestamp without time zone From 167645428ecb0ffc29601a76130f87176e6a85a5 Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Wed, 28 Aug 2024 12:04:05 +0200 Subject: [PATCH 5/6] renamed columns --- models/intermediate/core/int_edeposit__verifications.sql | 8 +++++--- models/intermediate/core/schema.yaml | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/models/intermediate/core/int_edeposit__verifications.sql b/models/intermediate/core/int_edeposit__verifications.sql index 2292dc9..ebb170f 100644 --- a/models/intermediate/core/int_edeposit__verifications.sql +++ b/models/intermediate/core/int_edeposit__verifications.sql @@ -5,9 +5,11 @@ with select id as id_verification, id_booking, - id_user, - id_listing, - version, -- V1 for guesty and V2 for e-deposit verifications + id_user as id_user_host, + id_listing as id_accommodation, + case + when version = 'V1' then 'Guesty' when version = 'V2' then 'Edeposit' else null + end as verification_source, verification_status, verification_status_reason, nightly_fee_local, diff --git a/models/intermediate/core/schema.yaml b/models/intermediate/core/schema.yaml index a5d3575..6c7f064 100644 --- a/models/intermediate/core/schema.yaml +++ b/models/intermediate/core/schema.yaml @@ -2786,11 +2786,11 @@ models: data_type: text description: "unique Superhog generated id for a booking" - - name: id_user + - name: id_user_host data_type: text - description: "unique Superhog generated id for a guest" + description: "unique Superhog generated id for host" - - name: id_listing + - name: id_accommodation data_type: text description: "unique Superhog generated id for a listing" From b333b458910bb3c68edef89c0c1ce0f2dd5e4ac3 Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Wed, 28 Aug 2024 15:00:22 +0200 Subject: [PATCH 6/6] Added some comments to make it clear that ids here are unrelated to core dwh, I will come back to modify the schemas when Ray answers all of our questions related to this data --- models/intermediate/core/int_edeposit__verifications.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/models/intermediate/core/int_edeposit__verifications.sql b/models/intermediate/core/int_edeposit__verifications.sql index ebb170f..8440f50 100644 --- a/models/intermediate/core/int_edeposit__verifications.sql +++ b/models/intermediate/core/int_edeposit__verifications.sql @@ -3,10 +3,13 @@ with select * from {{ ref("stg_edeposit__verifications") }} ) select + -- note that these ids are not the same as the ones found in Core DWH + -- they are completely unrelated id as id_verification, id_booking, id_user as id_user_host, id_listing as id_accommodation, + version, -- V1 for Guesty and V2 for E-deposit case when version = 'V1' then 'Guesty' when version = 'V2' then 'Edeposit' else null end as verification_source,