diff --git a/models/intermediate/core/int_core__bookings.sql b/models/intermediate/core/int_core__bookings.sql index 79685b0..0bbc91c 100644 --- a/models/intermediate/core/int_core__bookings.sql +++ b/models/intermediate/core/int_core__bookings.sql @@ -26,7 +26,7 @@ select b.id_verification_request, vrbs.verification_request_booking_source, b.id_staging_host_booking, - db.is_duplicate_booking, + coalesce(db.is_duplicate_booking, false) as is_duplicate_booking, db.is_duplicating_booking_with_id, bs.booking_state, b.check_in_at_utc, diff --git a/models/intermediate/core/int_core__duplicate_bookings.sql b/models/intermediate/core/int_core__duplicate_bookings.sql index 9c9b585..849dc49 100644 --- a/models/intermediate/core/int_core__duplicate_bookings.sql +++ b/models/intermediate/core/int_core__duplicate_bookings.sql @@ -1,33 +1,58 @@ {{ config(materialized="table", unique_key="id_booking") }} with - stg_core__booking_state as (select * from {{ ref("stg_core__booking_state") }}), + stg_core__booking as (select * from {{ ref("stg_core__booking") }}), stg_core__booking_with_dup_flag as ( select - case -- This ugly thing below is true if the booking is duplicate, false if not + -- This ugly thing below is true if the booking is duplicate and is not + -- the original, false otherwise + case when row_number() over ( partition by id_user_guest, id_accommodation, check_in_date_utc order by id_booking asc ) > 1 + and id_user_guest is not null then true else false end as is_duplicate_booking, * - from {{ ref("stg_core__booking") }} + from stg_core__booking + ), + -- These next queries are to obtain the original booking_id for those duplicated so + -- then we can associate to which booking_id they are duplicated from + duplicate_booking as ( + select + id_user_guest, + id_accommodation, + check_in_date_utc, + count(*) as number_bookings + from stg_core__booking + group by id_user_guest, id_accommodation, check_in_date_utc + ), + stg_core__booking_duplicated as ( + select bdf.* + from stg_core__booking_with_dup_flag bdf + left join + duplicate_booking db + on ( + db.id_user_guest = bdf.id_user_guest + and db.id_accommodation = bdf.id_accommodation + and db.check_in_date_utc = bdf.check_in_date_utc + ) + where bdf.is_duplicate_booking = false and db.number_bookings > 1 ) select db.id_booking, db.is_duplicate_booking, - b.id_booking as is_duplicating_booking_with_id + bd.id_booking as is_duplicating_booking_with_id from stg_core__booking_with_dup_flag db -left join stg_core__booking_state bs on db.id_booking_state = bs.id_booking_state left join - stg_core__booking_with_dup_flag b + stg_core__booking_duplicated bd on ( - b.id_user_guest = db.id_user_guest - and b.id_accommodation = db.id_accommodation - and b.check_in_date_utc = db.check_in_date_utc - and b.id_booking != db.id_booking + bd.id_user_guest = db.id_user_guest + and bd.id_accommodation = db.id_accommodation + and bd.check_in_date_utc = db.check_in_date_utc + and bd.id_booking != db.id_booking ) -where b.is_duplicate_booking = false and db.is_duplicate_booking = true +where db.is_duplicate_booking = true diff --git a/models/intermediate/core/schema.yaml b/models/intermediate/core/schema.yaml index dc720ce..2cc97d7 100644 --- a/models/intermediate/core/schema.yaml +++ b/models/intermediate/core/schema.yaml @@ -19,6 +19,9 @@ models: - name: id_booking data_type: bigint description: The unique, Superhog generated id for this booking. + tests: + - unique + - not_null - name: is_duplicate_booking data_type: boolean @@ -39,6 +42,8 @@ models: original and the others are the duplicates: - B and C will appear in this table, A will not. - The value of this field for both B and C will be A's id. + tests: + - not_null - name: int_core__booking_charge_events description: | @@ -1377,7 +1382,7 @@ models: accomodation and check-in date, the oldest one will have False as a value in this field, and the other ones will have True as a value in this Failed." - Put simply, if you don't want to receive duplicates, filter this field to True. + Put simply, if you don't want to receive duplicates, filter this field to False. - name: is_duplicating_booking_with_id data_type: bigint