From 5551c97db0df040e67a6028610aeed4072642444 Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Fri, 13 Sep 2024 16:14:03 +0200 Subject: [PATCH] removing duplicates from guesty and removing tests for edeposit_users --- .../int_edeposit__guesty_verifications.sql | 19 +++-- models/intermediate/edeposit/schema.yml | 5 +- models/reporting/edeposit/schema.yml | 5 +- models/staging/core/schema.yml | 72 +++++++++---------- 4 files changed, 57 insertions(+), 44 deletions(-) diff --git a/models/intermediate/edeposit/int_edeposit__guesty_verifications.sql b/models/intermediate/edeposit/int_edeposit__guesty_verifications.sql index e0b88e6..514abd5 100644 --- a/models/intermediate/edeposit/int_edeposit__guesty_verifications.sql +++ b/models/intermediate/edeposit/int_edeposit__guesty_verifications.sql @@ -4,14 +4,23 @@ with int_edeposit__verifications as ( select * from {{ ref("int_edeposit__verifications") }} + ), + -- CTE to rank verifications by updated_at_utc per id_booking + ranked_verifications as ( + select + v.*, + row_number() over ( + partition by v.id_booking order by v.updated_at_utc desc + ) as rn + from int_edeposit__verifications v + where v.version = 'V1' and v.id_booking is not null ) select v.id_verification, v.id_booking, v.verification_status, v.is_cancelled, - -- when number_nights = 0 (booking's checkin and checkout are on the same day) - -- it's charged for just 1 night + -- Charge for 1 night if number_nights = 0 case when v.number_nights = 0 and v.verification_status = '{{ ok_status }}' then {{ cost_per_night }} @@ -21,5 +30,7 @@ select end as ok_status_fee_in_gbp, v.created_date_utc, v.checkout_date_utc -from int_edeposit__verifications v -where v.version = 'V1' and v.id_booking is not null +from ranked_verifications v +where + -- Select only the most recent verification for each id_booking + v.rn = 1 diff --git a/models/intermediate/edeposit/schema.yml b/models/intermediate/edeposit/schema.yml index e8d27f8..0887122 100644 --- a/models/intermediate/edeposit/schema.yml +++ b/models/intermediate/edeposit/schema.yml @@ -319,10 +319,11 @@ models: - name: id_booking data_type: text description: "unique Superhog generated id for a booking. - note that this could be duplicated and both will be charged, - it's up to the user to not generate or cancel duplicate verifications" + note that there might be duplicate bookings on the original data + but we remove them keeping only the verification with the most recent update." tests: - not_null + - unique - name: verification_status data_type: text diff --git a/models/reporting/edeposit/schema.yml b/models/reporting/edeposit/schema.yml index bfb6f59..7a68429 100644 --- a/models/reporting/edeposit/schema.yml +++ b/models/reporting/edeposit/schema.yml @@ -19,10 +19,11 @@ models: - name: id_booking data_type: text description: "unique Superhog generated id for a booking. - note that this could be duplicated and both will be charged, - it's up to the user to no generate duplicate verifications" + note that there might be duplicate bookings on the original data + but we remove them keeping only the verification with the most recent update." tests: - not_null + - unique - name: currency data_type: text diff --git a/models/staging/core/schema.yml b/models/staging/core/schema.yml index e7f938d..c6e6c43 100644 --- a/models/staging/core/schema.yml +++ b/models/staging/core/schema.yml @@ -202,74 +202,74 @@ models: - name: id data_type: bigint description: "Record id for this table" - tests: - - unique - - not_null + # tests: + # - unique + # - not_null - name: id_user_partner data_type: character varying description: "Unique id for partner user" - tests: - - unique - - not_null + # tests: + # - unique + # - not_null - name: currency data_type: character varying description: "Three-letter ISO code assigned to the currency used by user." - tests: - - not_null - - dbt_expectations.expect_column_values_to_match_regex: - regex: "^[A-Z]{3}$" + # tests: + # - not_null + # - dbt_expectations.expect_column_values_to_match_regex: + # regex: "^[A-Z]{3}$" - name: nightly_fee data_type: numeric description: "Fee charged per night on booking" - tests: - - not_null - - dbt_expectations.expect_column_values_to_be_between: - min_value: 0 - max_value: 100 - strictly: true + # tests: + # - not_null + # - dbt_expectations.expect_column_values_to_be_between: + # min_value: 0 + # max_value: 100 + # strictly: true - name: cancellation_fee data_type: numeric description: "Fee charged per cancelled booking" - tests: - - not_null - - dbt_expectations.expect_column_values_to_be_between: - min_value: 0 - max_value: 100 - strictly: true + # tests: + # - not_null + # - dbt_expectations.expect_column_values_to_be_between: + # min_value: 0 + # max_value: 100 + # strictly: true - name: protection_lower_level data_type: numeric description: "Amount of protection given when verification is 'Passed'" - tests: - - not_null - - dbt_expectations.expect_column_values_to_be_between: - min_value: 0 - strictly: true + # tests: + # - not_null + # - dbt_expectations.expect_column_values_to_be_between: + # min_value: 0 + # strictly: true - name: protection_upper_level data_type: numeric description: "Amount of protection given when verification is 'Flagged'" - tests: - - not_null - - dbt_expectations.expect_column_values_to_be_between: - min_value: 0 - strictly: true + # tests: + # - not_null + # - dbt_expectations.expect_column_values_to_be_between: + # min_value: 0 + # strictly: true - name: created_at_utc data_type: timestamp without time zone description: "Timestamp of when user was created" - tests: - - not_null + # tests: + # - not_null - name: updated_at_utc data_type: timestamp without time zone description: "Timestamp of when user was last updated" - tests: - - not_null + # tests: + # - not_null - name: dwh_extracted_at_utc data_type: timestamp with time zone