From 964ccbaa24a1f9c1dab659912ba722cd730d78da Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Wed, 20 Mar 2024 10:36:14 +0100 Subject: [PATCH 1/8] add a couple tables to sources --- models/staging/core/_core_sources.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/models/staging/core/_core_sources.yml b/models/staging/core/_core_sources.yml index 502c86f..037c9b2 100644 --- a/models/staging/core/_core_sources.yml +++ b/models/staging/core/_core_sources.yml @@ -169,6 +169,8 @@ sources: description: "{{ doc('_airbyte_meta_desc') }}" - name: superhog_user identifier: SuperhogUser + - name: UserVerificationStatus + identifier: UserVerificationStatus - name: VerificationRequest identifier: VerificationRequest - name: VerificationToPayment @@ -181,6 +183,8 @@ sources: identifier: PaymentStatus - name: Booking identifier: Booking + - name: BookingState + identifier: BookingState - name: PricePlanToUser identifier: PricePlanToUser - name: PricePlanChargedByType From 18c4121d628318f5caeb751fe88a0409cd5cd698 Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Wed, 20 Mar 2024 10:36:23 +0100 Subject: [PATCH 2/8] stg model for booking state --- models/staging/core/stg_core__booking_state.sql | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 models/staging/core/stg_core__booking_state.sql diff --git a/models/staging/core/stg_core__booking_state.sql b/models/staging/core/stg_core__booking_state.sql new file mode 100644 index 0000000..4fbe257 --- /dev/null +++ b/models/staging/core/stg_core__booking_state.sql @@ -0,0 +1,12 @@ +with + raw_booking_state as (select * from {{ source("core", "BookingState") }}), + stg_core__booking_state as ( + select + {{ adapter.quote("Id") }} as id_booking_state, + {{ adapter.quote("Name") }} as booking_state, + {{ adapter.quote("_airbyte_extracted_at") }} as dwh_extracted_at_utc + + from raw_booking_state + ) +select * +from stg_core__booking_state From 09a62c701a6896ac8dec63b2bd43bded638af658 Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Wed, 20 Mar 2024 10:38:06 +0100 Subject: [PATCH 3/8] stg model for user verification --- .../core/stg_core__user_verification_status.sql | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 models/staging/core/stg_core__user_verification_status.sql diff --git a/models/staging/core/stg_core__user_verification_status.sql b/models/staging/core/stg_core__user_verification_status.sql new file mode 100644 index 0000000..d0e9d0e --- /dev/null +++ b/models/staging/core/stg_core__user_verification_status.sql @@ -0,0 +1,14 @@ +with + raw_user_verification_status as ( + select * from {{ source("core", "UserVerificationStatus") }} + ), + stg_core__user_verification_status as ( + select + {{ adapter.quote("Id") }} as id_user_verification_status, + {{ adapter.quote("Name") }} as user_verification_status, + {{ adapter.quote("_airbyte_extracted_at") }} as dwh_extracted_at_utc + + from raw_user_verification_status + ) +select * +from stg_core__user_verification_status From d8cf15be4a07b515bfb8a9e31f4b90e8aa707668 Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Wed, 20 Mar 2024 17:23:08 +0100 Subject: [PATCH 4/8] basic bookings and duplicates --- models/intermediate/int_core__bookings.sql | 40 +++++++++++++ .../int_core__duplicate_bookings.sql | 56 +++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 models/intermediate/int_core__bookings.sql create mode 100644 models/intermediate/int_core__duplicate_bookings.sql diff --git a/models/intermediate/int_core__bookings.sql b/models/intermediate/int_core__bookings.sql new file mode 100644 index 0000000..1687132 --- /dev/null +++ b/models/intermediate/int_core__bookings.sql @@ -0,0 +1,40 @@ +with + stg_core__booking as (select * from {{ ref("stg_core__booking") }}), + stg_core__booking_state as (select * from {{ ref("stg_core__booking_state") }}), + int_core__duplicate_bookings as ( + select id_booking, is_duplicate_booking, is_duplicating_booking_with_id + from {{ ref("int_core__duplicate_bookings") }} + ) + +select + b.id_booking, + b.id_user_guest, + b.id_user_host, + b.id_integration, + b.id_accommodation, + b.id_booking_source, + b.id_verification_request, + b.id_staging_host_booking, + db.is_duplicate_booking, + db.is_duplicating_booking_with_id, + bs.booking_state, + b.check_in_at_utc, + b.check_in_date_utc, + b.check_out_at_utc, + b.check_out_date_utc, + b.summary, + b.guest_email, + b.guest_last_name, + b.guest_first_name, + b.guest_telephone, + b.additional_guests, + b.unsubscribe_verification_reminder, + b.created_at_utc, + b.created_date_utc, + b.updated_at_utc, + b.updated_date_utc, + b.dwh_extracted_at_utc +from stg_core__booking b +left join stg_core__booking_state bs on b.id_booking_state = bs.id_booking_state +left join int_core__duplicate_bookings as db on b.id_booking = db.id_booking +where b.updated_at_utc > '2024-01-01' and db.is_duplicate_booking is true diff --git a/models/intermediate/int_core__duplicate_bookings.sql b/models/intermediate/int_core__duplicate_bookings.sql new file mode 100644 index 0000000..d3492dd --- /dev/null +++ b/models/intermediate/int_core__duplicate_bookings.sql @@ -0,0 +1,56 @@ +with + stg_core__booking_state as (select * from {{ ref("stg_core__booking_state") }}), + stg_core__booking_with_dup_flag as ( + select + case -- This ugly thing below is true if the booking is duplicate, false if not + when + row_number() over ( + partition by id_user_guest, id_accommodation, check_in_date_utc + order by id_booking asc + ) + > 1 + then true + else false + end as is_duplicate_booking, + * + from {{ ref("stg_core__booking") }} + ) +select + db.id_booking, + db.id_user_guest, + db.id_user_host, + db.id_integration, + db.id_accommodation, + db.id_booking_source, + db.id_verification_request, + db.id_staging_host_booking, + db.is_duplicate_booking, + b.id_booking as is_duplicating_booking_with_id, + bs.booking_state, + db.check_in_at_utc, + db.check_in_date_utc, + db.check_out_at_utc, + db.check_out_date_utc, + db.summary, + db.guest_email, + db.guest_last_name, + db.guest_first_name, + db.guest_telephone, + db.additional_guests, + db.unsubscribe_verification_reminder, + db.created_at_utc, + db.created_date_utc, + db.updated_at_utc, + db.updated_date_utc, + db.dwh_extracted_at_utc +from stg_core__booking_with_dup_flag db +left join stg_core__booking_state bs on db.id_booking_state = bs.id_booking_state +left join + stg_core__booking_with_dup_flag b + on ( + b.id_user_guest = db.id_user_guest + and b.id_accommodation = db.id_accommodation + and b.check_in_date_utc = db.check_in_date_utc + and b.id_booking != db.id_booking + ) +where b.is_duplicate_booking = false and db.is_duplicate_booking = true From dbd2f447955487916196414ead32bb4258f1977b Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Wed, 20 Mar 2024 17:33:01 +0100 Subject: [PATCH 5/8] removed testing filter --- models/intermediate/int_core__bookings.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/models/intermediate/int_core__bookings.sql b/models/intermediate/int_core__bookings.sql index 1687132..13e5871 100644 --- a/models/intermediate/int_core__bookings.sql +++ b/models/intermediate/int_core__bookings.sql @@ -37,4 +37,3 @@ select from stg_core__booking b left join stg_core__booking_state bs on b.id_booking_state = bs.id_booking_state left join int_core__duplicate_bookings as db on b.id_booking = db.id_booking -where b.updated_at_utc > '2024-01-01' and db.is_duplicate_booking is true From 5101a2a45ebfda52ab4ebc8cb9f6b31fec17f83f Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Thu, 4 Apr 2024 10:54:56 +0200 Subject: [PATCH 6/8] tons of work, dear god what a spaghetti --- dev-dwh.docker-compose.yml | 1 + .../int_core__booking_charge_events.sql | 43 +++++++++++++++++ models/intermediate/int_core__bookings.sql | 11 ++++- models/intermediate/int_core__price_plans.sql | 44 +++++++++++++++++ .../intermediate/int_core__unified_user.sql | 4 +- .../int_core__verification_requests.sql | 48 +++++++++++++++++++ .../staging/core/stg_core__superhog_user.sql | 3 +- models/staging/core/stg_core__user.sql | 3 +- 8 files changed, 152 insertions(+), 5 deletions(-) create mode 100644 models/intermediate/int_core__booking_charge_events.sql create mode 100644 models/intermediate/int_core__price_plans.sql create mode 100644 models/intermediate/int_core__verification_requests.sql diff --git a/dev-dwh.docker-compose.yml b/dev-dwh.docker-compose.yml index 32a493b..0d9e43a 100644 --- a/dev-dwh.docker-compose.yml +++ b/dev-dwh.docker-compose.yml @@ -9,6 +9,7 @@ services: - '5432:5432' volumes: - dwh-local:/var/lib/postgresql/data + shm_size: 1g volumes: dwh-local: driver: local \ No newline at end of file diff --git a/models/intermediate/int_core__booking_charge_events.sql b/models/intermediate/int_core__booking_charge_events.sql new file mode 100644 index 0000000..99bd453 --- /dev/null +++ b/models/intermediate/int_core__booking_charge_events.sql @@ -0,0 +1,43 @@ +with + stg_core__booking as (select * from {{ ref("stg_core__booking") }}), + int_core__price_plans as (select * from {{ ref("int_core__price_plans") }}), + int_core__verification_requests as ( + select * from {{ ref("int_core__verification_requests") }} + ), + booking_with_price_plans as ( + select + b.id_booking, + pp.id_price_plan, + pp.booking_fee, + -- There is an obscure edge case where a single booking might be charged + -- twice according to our logic. We use this to untie by simply picking + -- the oldest charge event. + row_number() over ( + partition by b.id_booking order by pp.id_price_plan + ) as price_plan_priority_rank, + -- The below case will be true when a price plan is relevant for the + -- booking. + case + when pp.price_plan_charged_by_type = 'CheckInDate' + then b.check_in_at_utc between pp.start_at_utc and pp.end_at_utc + when pp.price_plan_charged_by_type in ('VerificationStartDate', 'All') + then + coalesce( + ( + vr.verification_start_at_utc + between pp.start_at_utc and pp.end_at_utc + ), + false + ) + else false + end as booking_belongs_to_price_plan + from stg_core__booking b + left join + int_core__verification_requests vr + on b.id_verification_request = vr.id_verification_request + left join int_core__price_plans pp on b.id_user_host = pp.id_user_host + ) + +select bpp.id_booking, bpp.id_price_plan, bpp.booking_fee +from booking_with_price_plans bpp +where bpp.booking_belongs_to_price_plan = true and price_plan_priority_rank = 1 diff --git a/models/intermediate/int_core__bookings.sql b/models/intermediate/int_core__bookings.sql index 13e5871..88e3d05 100644 --- a/models/intermediate/int_core__bookings.sql +++ b/models/intermediate/int_core__bookings.sql @@ -4,8 +4,14 @@ with int_core__duplicate_bookings as ( select id_booking, is_duplicate_booking, is_duplicating_booking_with_id from {{ ref("int_core__duplicate_bookings") }} + ), + int_core__price_plans as (select * from {{ ref("int_core__price_plans") }}), + int_core__verification_requests as ( + select * from {{ ref("int_core__verification_requests") }} + ), + int_core__booking_charge_events as ( + select * from {{ ref("int_core__booking_charge_events") }} ) - select b.id_booking, b.id_user_guest, @@ -36,4 +42,5 @@ select b.dwh_extracted_at_utc from stg_core__booking b left join stg_core__booking_state bs on b.id_booking_state = bs.id_booking_state -left join int_core__duplicate_bookings as db on b.id_booking = db.id_booking +left join int_core__duplicate_bookings db on b.id_booking = db.id_booking +left join int_core__booking_charge_events bce on b.id_booking = bce.id_booking diff --git a/models/intermediate/int_core__price_plans.sql b/models/intermediate/int_core__price_plans.sql new file mode 100644 index 0000000..351965b --- /dev/null +++ b/models/intermediate/int_core__price_plans.sql @@ -0,0 +1,44 @@ +with + stg_core__price_plan_to_user as ( + select * from {{ ref("stg_core__price_plan_to_user") }} + ), + stg_core__price_plan_charged_by_type as ( + select * from {{ ref("stg_core__price_plan_charged_by_type") }} + ), + latest_price_plan_id_by_host as ( + select id_user_host, max(id_price_plan) as latest_id_price_plan + from {{ ref("stg_core__price_plan_to_user") }} + group by id_user_host + ) + +select + pp.id_price_plan, + pp.id_user_host, + ppt.price_plan_charged_by_type, + pp.start_at_utc, + pp.start_date_utc, + coalesce(pp.end_at_utc, '2099-12-31 23:59:59') as end_at_utc, + cast(coalesce(pp.end_at_utc, '2099-12-31 23:59:59') as date) end_date_utc, + -- active plans have null as an end date, which is very elegant, but a PITA for + -- joining. Hence, we just put a date superlong in the future. If you are in + -- 2098... hi, make sure to push this :) + case + when pp.id_price_plan = lpp.latest_id_price_plan then true else false + end as is_latest_price_plan_for_host, + pp.booking_fee, + pp.listing_fee, + pp.support_fee, + pp.tax_percentage, + pp.minimum_billable_listings, + pp.minimum_monthly_listing_fee, + pp.created_at_utc, + pp.created_date_utc, + pp.updated_at_utc, + pp.updated_date_utc, + pp.dwh_extracted_at_utc +from stg_core__price_plan_to_user pp +left join + stg_core__price_plan_charged_by_type ppt + on pp.id_price_plan_charged_by_type = ppt.id_price_plan_charged_by_type +left join + latest_price_plan_id_by_host lpp on lpp.latest_id_price_plan = pp.id_price_plan diff --git a/models/intermediate/int_core__unified_user.sql b/models/intermediate/int_core__unified_user.sql index 4f5bb7a..b117d32 100644 --- a/models/intermediate/int_core__unified_user.sql +++ b/models/intermediate/int_core__unified_user.sql @@ -10,7 +10,8 @@ select title, id_deal, is_deleted, - join_date_utc, + joined_at_utc, + joined_date_utc, user_name, code_prefix, billing_town, @@ -27,6 +28,7 @@ select airbnb_url, created_date_utc, updated_date_utc, + verified_at_utc, verified_date_utc, flag_as_problem, number_of_properties, diff --git a/models/intermediate/int_core__verification_requests.sql b/models/intermediate/int_core__verification_requests.sql new file mode 100644 index 0000000..61aac4a --- /dev/null +++ b/models/intermediate/int_core__verification_requests.sql @@ -0,0 +1,48 @@ +with + stg_core__verification_request as ( + select * from {{ ref("stg_core__verification_request") }} + ), + int_core__unified_user as (select * from {{ ref("int_core__unified_user") }}) +select + vr.id_verification_request, + vr.uuid_verification_request, + vr.id_verification_set, + vr.id_superhog_verified_set, + vr.id_payment_validation_set, + vr.id_user_guest, + vr.id_user_host, + vr.verification_url, + vr.callback_url, + vr.redirect_url, + vr.logo, + vr.guest_email, + vr.last_name, + vr.first_name, + vr.guest_phone_number, + vr.telephone_code, + vr.guest_phone_number_2, + guest.joined_at_utc as verification_start_at_utc, + guest.joined_date_utc as verification_start_date_utc, + guest.verified_at_utc as verification_end_at_utc, + guest.verified_date_utc as verification_end_date_utc, + vr.link_used_at_utc, + vr.link_used_date_utc, + vr.expire_at_utc, + vr.expire_date_utc, + vr.is_deleted, + vr.redirect_name, + vr.id_one_step_link, + vr.success_message, + vr.summary, + vr.rejection_reason, + vr.has_switched_to_mobile, + vr.is_verifier_rejected, + vr.config, + vr.metadata, + vr.created_at_utc, + vr.created_date_utc, + vr.updated_at_utc, + vr.updated_date_utc, + vr.dwh_extracted_at_utc +from stg_core__verification_request vr +left join int_core__unified_user guest on vr.id_user_guest = guest.id_user diff --git a/models/staging/core/stg_core__superhog_user.sql b/models/staging/core/stg_core__superhog_user.sql index d603892..07d962c 100644 --- a/models/staging/core/stg_core__superhog_user.sql +++ b/models/staging/core/stg_core__superhog_user.sql @@ -7,7 +7,8 @@ with {{ adapter.quote("AirbnbUrl") }} as airbnb_url, {{ adapter.quote("CreatedDate") }} as created_date_utc, {{ adapter.quote("UpdatedDate") }} as updated_date_utc, - {{ adapter.quote("VerifiedDate") }} as verified_date_utc, + {{ adapter.quote("VerifiedDate") }} as verified_at_utc, + cast({{ adapter.quote("VerifiedDate") }} as date) as verified_date_utc, {{ adapter.quote("FlagAsProblem") }} as flag_as_problem, {{ adapter.quote("SuperhogUserId") }} as id_superhoguser, {{ adapter.quote("NumberOfProperties") }} as number_of_properties, diff --git a/models/staging/core/stg_core__user.sql b/models/staging/core/stg_core__user.sql index 2d246c6..8f390a2 100644 --- a/models/staging/core/stg_core__user.sql +++ b/models/staging/core/stg_core__user.sql @@ -8,7 +8,8 @@ with {{ adapter.quote("Title") }} as title, {{ adapter.quote("DealId") }} as id_deal, {{ adapter.quote("Deleted") }} as is_deleted, - {{ adapter.quote("JoinDate") }} as join_date_utc, + {{ adapter.quote("JoinDate") }} as joined_at_utc, + cast({{ adapter.quote("JoinDate") }} as date) as joined_date_utc, {{ adapter.quote("LastName") }} as last_name, {{ adapter.quote("UserName") }} as user_name, {{ adapter.quote("FirstName") }} as first_name, From df61682d90d73a9a2da8f692a5ad3a1ae78d284b Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Thu, 4 Apr 2024 16:33:08 +0200 Subject: [PATCH 7/8] a lot of changes --- .../int_core__booking_charge_events.sql | 57 ++++++++++++------- models/intermediate/int_core__bookings.sql | 3 + models/intermediate/int_core__price_plans.sql | 8 +-- models/reporting/core/core__bookings.sql | 12 ++++ models/staging/core/stg_core__booking.sql | 11 ++++ .../core/stg_core__price_plan_to_user.sql | 11 ++-- .../staging/core/stg_core__superhog_user.sql | 2 + models/staging/core/stg_core__user.sql | 2 + .../core/stg_core__verification_request.sql | 11 ++++ 9 files changed, 88 insertions(+), 29 deletions(-) create mode 100644 models/reporting/core/core__bookings.sql diff --git a/models/intermediate/int_core__booking_charge_events.sql b/models/intermediate/int_core__booking_charge_events.sql index 99bd453..98a503a 100644 --- a/models/intermediate/int_core__booking_charge_events.sql +++ b/models/intermediate/int_core__booking_charge_events.sql @@ -4,19 +4,24 @@ with int_core__verification_requests as ( select * from {{ ref("int_core__verification_requests") }} ), - booking_with_price_plans as ( + booking_with_relevant_price_plans as ( select - b.id_booking, - pp.id_price_plan, - pp.booking_fee, - -- There is an obscure edge case where a single booking might be charged - -- twice according to our logic. We use this to untie by simply picking - -- the oldest charge event. - row_number() over ( - partition by b.id_booking order by pp.id_price_plan - ) as price_plan_priority_rank, - -- The below case will be true when a price plan is relevant for the - -- booking. + *, + case + when pp.price_plan_charged_by_type = 'CheckInDate' + then b.check_in_at_utc + when pp.price_plan_charged_by_type in ('VerificationStartDate', 'All') + then vr.verification_start_at_utc + end as booking_fee_charge_at_utc + from stg_core__booking b + left join + int_core__verification_requests vr + on b.id_verification_request = vr.id_verification_request + left join int_core__price_plans pp on b.id_user_host = pp.id_user_host + where + -- The dates that defines which price plan applies to the booking depends + -- on charged by type. With the below case, we evaluate if a certain price + -- plan relates to the booking case when pp.price_plan_charged_by_type = 'CheckInDate' then b.check_in_at_utc between pp.start_at_utc and pp.end_at_utc @@ -30,14 +35,24 @@ with false ) else false - end as booking_belongs_to_price_plan - from stg_core__booking b - left join - int_core__verification_requests vr - on b.id_verification_request = vr.id_verification_request - left join int_core__price_plans pp on b.id_user_host = pp.id_user_host + end + = true + ), + untied_bookings as ( + -- If a booking has two valid price plans, take the earliest + select id_booking, min(id_price_plan) as id_price_plan + from booking_with_relevant_price_plans brpp + group by id_booking ) -select bpp.id_booking, bpp.id_price_plan, bpp.booking_fee -from booking_with_price_plans bpp -where bpp.booking_belongs_to_price_plan = true and price_plan_priority_rank = 1 +select + ub.id_booking, + ub.id_price_plan, + brpp.booking_fee_local, + brpp.booking_fee_charge_at_utc, + cast(brpp.booking_fee_charge_at_utc as date) as booking_fee_charge_date_utc +from untied_bookings ub +left join + booking_with_relevant_price_plans brpp + on ub.id_booking = brpp.id_booking + and ub.id_price_plan = brpp.id_price_plan diff --git a/models/intermediate/int_core__bookings.sql b/models/intermediate/int_core__bookings.sql index 88e3d05..6dda1f4 100644 --- a/models/intermediate/int_core__bookings.sql +++ b/models/intermediate/int_core__bookings.sql @@ -28,6 +28,9 @@ select b.check_in_date_utc, b.check_out_at_utc, b.check_out_date_utc, + bce.booking_fee_local, + booking_fee_charge_at_utc, + booking_fee_charge_date_utc, b.summary, b.guest_email, b.guest_last_name, diff --git a/models/intermediate/int_core__price_plans.sql b/models/intermediate/int_core__price_plans.sql index 351965b..90ea8df 100644 --- a/models/intermediate/int_core__price_plans.sql +++ b/models/intermediate/int_core__price_plans.sql @@ -25,12 +25,12 @@ select case when pp.id_price_plan = lpp.latest_id_price_plan then true else false end as is_latest_price_plan_for_host, - pp.booking_fee, - pp.listing_fee, - pp.support_fee, + pp.booking_fee_local, + pp.listing_fee_local, + pp.support_fee_local, pp.tax_percentage, pp.minimum_billable_listings, - pp.minimum_monthly_listing_fee, + pp.minimum_monthly_listing_fee_local, pp.created_at_utc, pp.created_date_utc, pp.updated_at_utc, diff --git a/models/reporting/core/core__bookings.sql b/models/reporting/core/core__bookings.sql new file mode 100644 index 0000000..0612615 --- /dev/null +++ b/models/reporting/core/core__bookings.sql @@ -0,0 +1,12 @@ +with int_core__bookings as (select * from {{ ref("int_core__bookings") }}) + +select + b.id_booking, + b.id_user_guest, + b.id_user_host, + b.id_accommodation, + b.is_duplicate_booking, + b.booking_fee_local, + b.booking_fee_charge_at_utc, + b.booking_fee_charge_date_utc +from int_core__bookings b diff --git a/models/staging/core/stg_core__booking.sql b/models/staging/core/stg_core__booking.sql index 68e3c8e..3122101 100644 --- a/models/staging/core/stg_core__booking.sql +++ b/models/staging/core/stg_core__booking.sql @@ -1,3 +1,14 @@ +{{ + config( + indexes=[ + {"columns": ["id_booking"]}, + {"columns": ["id_user_host"]}, + {"columns": ["id_user_guest"]}, + {"columns": ["id_verification_request"]}, + ] + ) +}} + with raw_booking as (select * from {{ source("core", "Booking") }}), stg_core__booking as ( diff --git a/models/staging/core/stg_core__price_plan_to_user.sql b/models/staging/core/stg_core__price_plan_to_user.sql index 5ea2258..0e93f1a 100644 --- a/models/staging/core/stg_core__price_plan_to_user.sql +++ b/models/staging/core/stg_core__price_plan_to_user.sql @@ -1,3 +1,5 @@ +{{ config(indexes=[{"columns": ["id_price_plan"]}, {"columns": ["id_user_host"]}]) }} + with raw_price_plan_to_user as (select * from {{ source("core", "PricePlanToUser") }}), stg_core__price_plan_to_user as ( @@ -10,12 +12,13 @@ with cast({{ adapter.quote("StartDate") }} as date) as start_date_utc, {{ adapter.quote("EndDate") }} as end_at_utc, cast({{ adapter.quote("EndDate") }} as date) end_date_utc, - {{ adapter.quote("BookingFeeNet") }} as booking_fee, - {{ adapter.quote("ListingFeeNet") }} as listing_fee, - {{ adapter.quote("SupportFeeNet") }} as support_fee, + {{ adapter.quote("BookingFeeNet") }} as booking_fee_local, + {{ adapter.quote("ListingFeeNet") }} as listing_fee_local, + {{ adapter.quote("SupportFeeNet") }} as support_fee_local, {{ adapter.quote("TaxPercentage") }} as tax_percentage, {{ adapter.quote("MinBillableListings") }} as minimum_billable_listings, - {{ adapter.quote("MinMonthlyListingFee") }} as minimum_monthly_listing_fee, + {{ adapter.quote("MinMonthlyListingFee") }} + as minimum_monthly_listing_fee_local, {{ adapter.quote("CreatedDate") }} as created_at_utc, cast({{ adapter.quote("CreatedDate") }} as date) as created_date_utc, {{ adapter.quote("UpdatedDate") }} as updated_at_utc, diff --git a/models/staging/core/stg_core__superhog_user.sql b/models/staging/core/stg_core__superhog_user.sql index 07d962c..0acb83d 100644 --- a/models/staging/core/stg_core__superhog_user.sql +++ b/models/staging/core/stg_core__superhog_user.sql @@ -1,3 +1,5 @@ +{{ config(indexes=[{"columns": ["id_superhoguser"]}]) }} + with raw_superhog_user as (select * from {{ source("core", "superhog_user") }}), stg_core__superhog_user as ( diff --git a/models/staging/core/stg_core__user.sql b/models/staging/core/stg_core__user.sql index 8f390a2..c70a553 100644 --- a/models/staging/core/stg_core__user.sql +++ b/models/staging/core/stg_core__user.sql @@ -1,3 +1,5 @@ +{{ config(indexes=[{"columns": ["id_user"]}]) }} + with raw_user as (select * from {{ source("core", "User") }}), stg_core__user as ( diff --git a/models/staging/core/stg_core__verification_request.sql b/models/staging/core/stg_core__verification_request.sql index c1d2967..5caba63 100644 --- a/models/staging/core/stg_core__verification_request.sql +++ b/models/staging/core/stg_core__verification_request.sql @@ -1,3 +1,14 @@ +{{ + config( + indexes=[ + {"columns": ["id_verification_request"]}, + {"columns": ["id_user_guest"]}, + {"columns": ["id_user_host"]}, + ] + ) +}} + + with raw_verification_request as ( select * from {{ source("core", "VerificationRequest") }} From 67991800efdc7bcdfb12599d53ddcfd291e0913d Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Mon, 8 Apr 2024 09:44:32 +0200 Subject: [PATCH 8/8] docs and moving stuff --- models/intermediate/_int_models.yml | 23 ----- .../int_core__booking_charge_events.sql | 0 .../{ => core}/int_core__bookings.sql | 0 .../int_core__deal_id_master_list.sql | 0 .../int_core__duplicate_bookings.sql | 26 +----- .../{ => core}/int_core__price_plans.sql | 0 .../{ => core}/int_core__unified_user.sql | 0 .../int_core__verification_payments.sql | 0 .../int_core__verification_requests.sql | 0 models/intermediate/core/schema.yaml | 93 +++++++++++++++++++ models/reporting/core/schema.yaml | 70 ++++++++++++++ 11 files changed, 164 insertions(+), 48 deletions(-) delete mode 100644 models/intermediate/_int_models.yml rename models/intermediate/{ => core}/int_core__booking_charge_events.sql (100%) rename models/intermediate/{ => core}/int_core__bookings.sql (100%) rename models/intermediate/{ => core}/int_core__deal_id_master_list.sql (100%) rename models/intermediate/{ => core}/int_core__duplicate_bookings.sql (63%) rename models/intermediate/{ => core}/int_core__price_plans.sql (100%) rename models/intermediate/{ => core}/int_core__unified_user.sql (100%) rename models/intermediate/{ => core}/int_core__verification_payments.sql (100%) rename models/intermediate/{ => core}/int_core__verification_requests.sql (100%) create mode 100644 models/intermediate/core/schema.yaml diff --git a/models/intermediate/_int_models.yml b/models/intermediate/_int_models.yml deleted file mode 100644 index 7581785..0000000 --- a/models/intermediate/_int_models.yml +++ /dev/null @@ -1,23 +0,0 @@ - -version: 2 - -models: - - name: int_core__deal_id_master_list - description: "The distinct list of Deal Ids present in Core, along with the count of Accounts related to it." - columns: - - name: id_deal - description: "The Deal Id." - tests: - - unique - - not_null - - name: users_with_this_id_deal - description: "The count of Superhog accounts that have this Deal Id assigned." - - name: int_core__unified_user - description: "Data for all users after merging User and SuperhogUser." - columns: - - name: id_user - description: "The primary key for this table" - tests: - - unique - - not_null - diff --git a/models/intermediate/int_core__booking_charge_events.sql b/models/intermediate/core/int_core__booking_charge_events.sql similarity index 100% rename from models/intermediate/int_core__booking_charge_events.sql rename to models/intermediate/core/int_core__booking_charge_events.sql diff --git a/models/intermediate/int_core__bookings.sql b/models/intermediate/core/int_core__bookings.sql similarity index 100% rename from models/intermediate/int_core__bookings.sql rename to models/intermediate/core/int_core__bookings.sql diff --git a/models/intermediate/int_core__deal_id_master_list.sql b/models/intermediate/core/int_core__deal_id_master_list.sql similarity index 100% rename from models/intermediate/int_core__deal_id_master_list.sql rename to models/intermediate/core/int_core__deal_id_master_list.sql diff --git a/models/intermediate/int_core__duplicate_bookings.sql b/models/intermediate/core/int_core__duplicate_bookings.sql similarity index 63% rename from models/intermediate/int_core__duplicate_bookings.sql rename to models/intermediate/core/int_core__duplicate_bookings.sql index d3492dd..3538709 100644 --- a/models/intermediate/int_core__duplicate_bookings.sql +++ b/models/intermediate/core/int_core__duplicate_bookings.sql @@ -17,32 +17,8 @@ with ) select db.id_booking, - db.id_user_guest, - db.id_user_host, - db.id_integration, - db.id_accommodation, - db.id_booking_source, - db.id_verification_request, - db.id_staging_host_booking, db.is_duplicate_booking, - b.id_booking as is_duplicating_booking_with_id, - bs.booking_state, - db.check_in_at_utc, - db.check_in_date_utc, - db.check_out_at_utc, - db.check_out_date_utc, - db.summary, - db.guest_email, - db.guest_last_name, - db.guest_first_name, - db.guest_telephone, - db.additional_guests, - db.unsubscribe_verification_reminder, - db.created_at_utc, - db.created_date_utc, - db.updated_at_utc, - db.updated_date_utc, - db.dwh_extracted_at_utc + b.id_booking as is_duplicating_booking_with_id from stg_core__booking_with_dup_flag db left join stg_core__booking_state bs on db.id_booking_state = bs.id_booking_state left join diff --git a/models/intermediate/int_core__price_plans.sql b/models/intermediate/core/int_core__price_plans.sql similarity index 100% rename from models/intermediate/int_core__price_plans.sql rename to models/intermediate/core/int_core__price_plans.sql diff --git a/models/intermediate/int_core__unified_user.sql b/models/intermediate/core/int_core__unified_user.sql similarity index 100% rename from models/intermediate/int_core__unified_user.sql rename to models/intermediate/core/int_core__unified_user.sql diff --git a/models/intermediate/int_core__verification_payments.sql b/models/intermediate/core/int_core__verification_payments.sql similarity index 100% rename from models/intermediate/int_core__verification_payments.sql rename to models/intermediate/core/int_core__verification_payments.sql diff --git a/models/intermediate/int_core__verification_requests.sql b/models/intermediate/core/int_core__verification_requests.sql similarity index 100% rename from models/intermediate/int_core__verification_requests.sql rename to models/intermediate/core/int_core__verification_requests.sql diff --git a/models/intermediate/core/schema.yaml b/models/intermediate/core/schema.yaml new file mode 100644 index 0000000..c0d2800 --- /dev/null +++ b/models/intermediate/core/schema.yaml @@ -0,0 +1,93 @@ +version: 2 + +models: + - name: int_core__duplicate_bookings + description: | + A list of bookings which are considered duplicates of other bookings. + + We currently consider two bookings to be duplicate if they have the same: + - Guest user id + - Accomodation id + - Check-in date + + Bear in mind these bookings do have different booking ids. + + Out of a duplicated tuple of 2 or more bookings: + - Our logic will consider the oldest one to be the "original", not duplicate one. + - This table will contain only the duplicates, and not the original. + columns: + - name: id_booking + data_type: bigint + description: The unique, Superhog generated id for this booking. + + - name: is_duplicate_booking + data_type: boolean + description: | + True if the booking is duplicate. + + If you are thinking that this is redundant, you are right. All + records in this table will be true. But we keep this field to + make your life easier when joining with other tables. + + - name: is_duplicating_booking_with_id + data_type: bigint + description: | + + Indicates what's the original booking being duplicated. + + If there is a tuple of duplicate bookings {A, B, C}, where A is the + original and the others are the duplicates: + - B and C will appear in this table, A will not. + - The value of this field for both B and C will be A's id. + + - name: int_core__booking_charge_events + description: | + + Booking charge events is a fancy word for saying: a booking happened, + the related host had a booking fee set up at the right time, hence we + need to charge him. + + The table contains one record per booking and shows the associated + booking fee, as well as the point in time in which the charge event was + considered. + + Be wary of the booking fees: they don't have an associated currency. + Crazy, I know, but we currently don't store that information in the + backend. + + As for the charge dates: the exact point in time at which we consider + that we should be charging a fee depends on billing details of the host + customer. For some bookings, this will be the check-in. For others, its + when the guest begins the verification process. + + Not all bookings appear here since we don't charge a fee for all + bookings. + + columns: + - name: id_booking + data_type: bigint + description: The unique, Superhog generated id for this booking. + + - name: id_price_plan + data_type: bigint + description: The id of the price plan that relates to this booking. + + - name: booking_fee_local + data_type: numeric + description: The fee to apply to the booking, in host currency. + + - name: booking_fee_charge_at_utc + data_type: timestamp without time zone + description: | + The point in time in which the booking should be invoiced. + + This could be the check-in date of the booking or the date in which the guest verification + started, depending on the billing settings of the host. + + - name: booking_fee_charge_date_utc + data_type: date + description: | + The date in which the booking should be invoiced. + + This could be the check-in date of the booking or the date in which the guest verification + started, depending on the billing settings of the host. diff --git a/models/reporting/core/schema.yaml b/models/reporting/core/schema.yaml index 7cfa5ed..54d6429 100644 --- a/models/reporting/core/schema.yaml +++ b/models/reporting/core/schema.yaml @@ -18,6 +18,9 @@ models: - name: id_payment data_type: bigint description: Superhog id for this Payment. + tests: + - unique + - not_null - name: is_refundable data_type: boolean @@ -108,3 +111,70 @@ models: - name: notes data_type: character varying description: Free text description on the payment. Typically, contains explanations for integration issues with the payment processor. + + - name: core__bookings + description: | + + Bookings that have been processed by the Superhog backend. + + Each record matches one booking. + + All data is coming from the Superhog backend. Some dimensions have been denormalized. + Some pre-aggregations to the booking level have also been performed and added here. + + columns: + - name: id_booking + data_type: bigint + description: The unique, Superhog generated id for this booking. + tests: + - unique + - not_null + + - name: id_user_guest + data_type: character varying + description: The UUID of the Superhog user playing the guest role in the booking. + + - name: id_user_host + data_type: character varying + description: The UUID of the Superhog user playing the host role in the booking. + + - name: id_accommodation + data_type: bigint + description: The ID of the booked listing. + + - name: is_duplicate_booking + data_type: boolean + description: | + A flag that identifies whether the booking is a duplicate. + + A booking is considered a duplicate if there's an older booking with the same user, + accomodation and check-in date. If there are two or more bookings with the same user, + accomodation and check-in date, the oldest one will have False as a value in this field, + and the other ones will have True as a value in this Failed." + + Put simply, if you don't want to receive duplicates, filter this field to True. + + - name: booking_fee_local + data_type: numeric + description: | + The fee that Superhog should charge the host for this booking, measured in local + currency. + + This fee is calculated by finding what was the active price plan for the host user + when the booking fee had to be charged. + + - name: booking_fee_charge_at_utc + data_type: timestamp without time zone + description: | + The point in time in which the booking should be invoiced. + + This could be the check-in date of the booking or the date in which the guest verification + started, depending on the billing settings of the host. + + - name: booking_fee_charge_date_utc + data_type: date + description: | + The date in which the booking should be invoiced. + + This could be the check-in date of the booking or the date in which the guest verification + started, depending on the billing settings of the host.