removing duplicates from guesty and removing tests for edeposit_users

This commit is contained in:
Joaquin Ossa 2024-09-13 16:14:03 +02:00
parent ac1aed029d
commit 5551c97db0
4 changed files with 57 additions and 44 deletions

View file

@ -4,14 +4,23 @@
with
int_edeposit__verifications as (
select * from {{ ref("int_edeposit__verifications") }}
),
-- CTE to rank verifications by updated_at_utc per id_booking
ranked_verifications as (
select
v.*,
row_number() over (
partition by v.id_booking order by v.updated_at_utc desc
) as rn
from int_edeposit__verifications v
where v.version = 'V1' and v.id_booking is not null
)
select
v.id_verification,
v.id_booking,
v.verification_status,
v.is_cancelled,
-- when number_nights = 0 (booking's checkin and checkout are on the same day)
-- it's charged for just 1 night
-- Charge for 1 night if number_nights = 0
case
when v.number_nights = 0 and v.verification_status = '{{ ok_status }}'
then {{ cost_per_night }}
@ -21,5 +30,7 @@ select
end as ok_status_fee_in_gbp,
v.created_date_utc,
v.checkout_date_utc
from int_edeposit__verifications v
where v.version = 'V1' and v.id_booking is not null
from ranked_verifications v
where
-- Select only the most recent verification for each id_booking
v.rn = 1

View file

@ -319,10 +319,11 @@ models:
- name: id_booking
data_type: text
description: "unique Superhog generated id for a booking.
note that this could be duplicated and both will be charged,
it's up to the user to not generate or cancel duplicate verifications"
note that there might be duplicate bookings on the original data
but we remove them keeping only the verification with the most recent update."
tests:
- not_null
- unique
- name: verification_status
data_type: text

View file

@ -19,10 +19,11 @@ models:
- name: id_booking
data_type: text
description: "unique Superhog generated id for a booking.
note that this could be duplicated and both will be charged,
it's up to the user to no generate duplicate verifications"
note that there might be duplicate bookings on the original data
but we remove them keeping only the verification with the most recent update."
tests:
- not_null
- unique
- name: currency
data_type: text

View file

@ -202,74 +202,74 @@ models:
- name: id
data_type: bigint
description: "Record id for this table"
tests:
- unique
- not_null
# tests:
# - unique
# - not_null
- name: id_user_partner
data_type: character varying
description: "Unique id for partner user"
tests:
- unique
- not_null
# tests:
# - unique
# - not_null
- name: currency
data_type: character varying
description: "Three-letter ISO code assigned to the currency used by user."
tests:
- not_null
- dbt_expectations.expect_column_values_to_match_regex:
regex: "^[A-Z]{3}$"
# tests:
# - not_null
# - dbt_expectations.expect_column_values_to_match_regex:
# regex: "^[A-Z]{3}$"
- name: nightly_fee
data_type: numeric
description: "Fee charged per night on booking"
tests:
- not_null
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
max_value: 100
strictly: true
# tests:
# - not_null
# - dbt_expectations.expect_column_values_to_be_between:
# min_value: 0
# max_value: 100
# strictly: true
- name: cancellation_fee
data_type: numeric
description: "Fee charged per cancelled booking"
tests:
- not_null
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
max_value: 100
strictly: true
# tests:
# - not_null
# - dbt_expectations.expect_column_values_to_be_between:
# min_value: 0
# max_value: 100
# strictly: true
- name: protection_lower_level
data_type: numeric
description: "Amount of protection given when verification is 'Passed'"
tests:
- not_null
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: true
# tests:
# - not_null
# - dbt_expectations.expect_column_values_to_be_between:
# min_value: 0
# strictly: true
- name: protection_upper_level
data_type: numeric
description: "Amount of protection given when verification is 'Flagged'"
tests:
- not_null
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: true
# tests:
# - not_null
# - dbt_expectations.expect_column_values_to_be_between:
# min_value: 0
# strictly: true
- name: created_at_utc
data_type: timestamp without time zone
description: "Timestamp of when user was created"
tests:
- not_null
# tests:
# - not_null
- name: updated_at_utc
data_type: timestamp without time zone
description: "Timestamp of when user was last updated"
tests:
- not_null
# tests:
# - not_null
- name: dwh_extracted_at_utc
data_type: timestamp with time zone