removing duplicates from guesty and removing tests for edeposit_users

This commit is contained in:
Joaquin Ossa 2024-09-13 16:14:03 +02:00
parent ac1aed029d
commit 5551c97db0
4 changed files with 57 additions and 44 deletions

View file

@ -4,14 +4,23 @@
with with
int_edeposit__verifications as ( int_edeposit__verifications as (
select * from {{ ref("int_edeposit__verifications") }} select * from {{ ref("int_edeposit__verifications") }}
),
-- CTE to rank verifications by updated_at_utc per id_booking
ranked_verifications as (
select
v.*,
row_number() over (
partition by v.id_booking order by v.updated_at_utc desc
) as rn
from int_edeposit__verifications v
where v.version = 'V1' and v.id_booking is not null
) )
select select
v.id_verification, v.id_verification,
v.id_booking, v.id_booking,
v.verification_status, v.verification_status,
v.is_cancelled, v.is_cancelled,
-- when number_nights = 0 (booking's checkin and checkout are on the same day) -- Charge for 1 night if number_nights = 0
-- it's charged for just 1 night
case case
when v.number_nights = 0 and v.verification_status = '{{ ok_status }}' when v.number_nights = 0 and v.verification_status = '{{ ok_status }}'
then {{ cost_per_night }} then {{ cost_per_night }}
@ -21,5 +30,7 @@ select
end as ok_status_fee_in_gbp, end as ok_status_fee_in_gbp,
v.created_date_utc, v.created_date_utc,
v.checkout_date_utc v.checkout_date_utc
from int_edeposit__verifications v from ranked_verifications v
where v.version = 'V1' and v.id_booking is not null where
-- Select only the most recent verification for each id_booking
v.rn = 1

View file

@ -319,10 +319,11 @@ models:
- name: id_booking - name: id_booking
data_type: text data_type: text
description: "unique Superhog generated id for a booking. description: "unique Superhog generated id for a booking.
note that this could be duplicated and both will be charged, note that there might be duplicate bookings on the original data
it's up to the user to not generate or cancel duplicate verifications" but we remove them keeping only the verification with the most recent update."
tests: tests:
- not_null - not_null
- unique
- name: verification_status - name: verification_status
data_type: text data_type: text

View file

@ -19,10 +19,11 @@ models:
- name: id_booking - name: id_booking
data_type: text data_type: text
description: "unique Superhog generated id for a booking. description: "unique Superhog generated id for a booking.
note that this could be duplicated and both will be charged, note that there might be duplicate bookings on the original data
it's up to the user to no generate duplicate verifications" but we remove them keeping only the verification with the most recent update."
tests: tests:
- not_null - not_null
- unique
- name: currency - name: currency
data_type: text data_type: text

View file

@ -202,74 +202,74 @@ models:
- name: id - name: id
data_type: bigint data_type: bigint
description: "Record id for this table" description: "Record id for this table"
tests: # tests:
- unique # - unique
- not_null # - not_null
- name: id_user_partner - name: id_user_partner
data_type: character varying data_type: character varying
description: "Unique id for partner user" description: "Unique id for partner user"
tests: # tests:
- unique # - unique
- not_null # - not_null
- name: currency - name: currency
data_type: character varying data_type: character varying
description: "Three-letter ISO code assigned to the currency used by user." description: "Three-letter ISO code assigned to the currency used by user."
tests: # tests:
- not_null # - not_null
- dbt_expectations.expect_column_values_to_match_regex: # - dbt_expectations.expect_column_values_to_match_regex:
regex: "^[A-Z]{3}$" # regex: "^[A-Z]{3}$"
- name: nightly_fee - name: nightly_fee
data_type: numeric data_type: numeric
description: "Fee charged per night on booking" description: "Fee charged per night on booking"
tests: # tests:
- not_null # - not_null
- dbt_expectations.expect_column_values_to_be_between: # - dbt_expectations.expect_column_values_to_be_between:
min_value: 0 # min_value: 0
max_value: 100 # max_value: 100
strictly: true # strictly: true
- name: cancellation_fee - name: cancellation_fee
data_type: numeric data_type: numeric
description: "Fee charged per cancelled booking" description: "Fee charged per cancelled booking"
tests: # tests:
- not_null # - not_null
- dbt_expectations.expect_column_values_to_be_between: # - dbt_expectations.expect_column_values_to_be_between:
min_value: 0 # min_value: 0
max_value: 100 # max_value: 100
strictly: true # strictly: true
- name: protection_lower_level - name: protection_lower_level
data_type: numeric data_type: numeric
description: "Amount of protection given when verification is 'Passed'" description: "Amount of protection given when verification is 'Passed'"
tests: # tests:
- not_null # - not_null
- dbt_expectations.expect_column_values_to_be_between: # - dbt_expectations.expect_column_values_to_be_between:
min_value: 0 # min_value: 0
strictly: true # strictly: true
- name: protection_upper_level - name: protection_upper_level
data_type: numeric data_type: numeric
description: "Amount of protection given when verification is 'Flagged'" description: "Amount of protection given when verification is 'Flagged'"
tests: # tests:
- not_null # - not_null
- dbt_expectations.expect_column_values_to_be_between: # - dbt_expectations.expect_column_values_to_be_between:
min_value: 0 # min_value: 0
strictly: true # strictly: true
- name: created_at_utc - name: created_at_utc
data_type: timestamp without time zone data_type: timestamp without time zone
description: "Timestamp of when user was created" description: "Timestamp of when user was created"
tests: # tests:
- not_null # - not_null
- name: updated_at_utc - name: updated_at_utc
data_type: timestamp without time zone data_type: timestamp without time zone
description: "Timestamp of when user was last updated" description: "Timestamp of when user was last updated"
tests: # tests:
- not_null # - not_null
- name: dwh_extracted_at_utc - name: dwh_extracted_at_utc
data_type: timestamp with time zone data_type: timestamp with time zone