Merged PR 2728: edeposit verifications docs and small refactors

# Description

This PR:
- Adds half-decent docs to `stg_edeposit__verifications` and tests. I say half-decent because I would describe our tests as "as strict as the backend guidance allows". But we can't do miracles, so it stays this way for now.
- Shifts a few column operations that were being done in the `int` layer into the `stg` layer.
- Also removes a couple of fields from `int` that were marked as deprecated by Ray. Would rather not have them at all beyond `stg`.

# Checklist

- [X] The edited models and dependants run properly with production data.
- [X] The edited models are sufficiently documented.
- [X] The edited models contain PK tests, and I've ran and passed them.
- [X] I have checked for DRY opportunities with other models and docs.
- [X] I've picked the right materialization for the affected models.

# Other

- [ ] Check if a full-refresh is required after this PR is merged.

Related work items: #20123
This commit is contained in:
Pablo Martín 2024-09-04 11:07:58 +00:00
commit dd57c28768
3 changed files with 280 additions and 15 deletions

View file

@ -7,9 +7,9 @@ select
-- they are completely unrelated
id_verification,
id_booking,
id_user as id_user_partner,
id_listing as id_accommodation,
version, -- V1 for Guesty and V2 for E-deposit
id_user_partner,
id_accommodation,
version,
case
when version = 'V1' then 'Guesty' when version = 'V2' then 'Edeposit' else null
end as verification_source,
@ -40,14 +40,11 @@ select
listing_country,
listing_postcode,
pets_allowed,
level_of_protection_amount,
level_of_protection_currency,
status_updated_at_utc,
cast(status_updated_at_utc as date) as status_updated_date_utc,
status_updated_date_utc,
updated_at_utc,
cast(updated_at_utc as date) as updated_date_utc,
creation_at_utc as athena_creation_at_utc,
cast(creation_at_utc as date) as athena_creation_date_utc,
athena_creation_at_utc,
athena_creation_date_utc,
created_at_utc,
cast(created_at_utc as date) as created_date_utc
created_date_utc
from stg_edeposit__verifications

View file

@ -12,4 +12,262 @@ models:
tests:
- unique
- not_null
# Plenty of stuff pending. You cheeky Pablo
- name: id_booking
data_type: text
description: ""
tests:
- not_null:
where: created_at_utc > '2024-04-01T00:00:00Z' and verification_status != 'InsufficientInformation'
- name: id_user_partner
data_type: text
description: The unique ID of the partner calling the API.
tests:
- not_null
- name: id_accommodation
data_type: text
description: An ID for the listing related to the booking.
- name: version
data_type: text
description: |
Indicates whether the verification is for V1 (Athena) or V2
(e-deposit).
tests:
- not_null
- accepted_values:
values:
- 'V1'
- 'V2'
- name: nightly_fee_local
data_type: numeric
description: |
The fee per night to be charged for this verification, in the currency
of the user. Note that the Athena/e-deposit user also has a configured
nightly fee. It's unclear at this point which one has priority for
billing.
tests:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
max_value: 100
strictly: true
- name: verification_status
data_type: text
description: |
This field shows the outcome of the Verification itself.
tests:
- not_null
- accepted_values:
values:
- "Approved"
- "Flagged"
- "Rejected"
- "InsufficientInformation"
- name: verification_status_reason
data_type: text
description: Not used, ignore.
- name: email_flag
data_type: text
description: |
Null if the email shows no issues, otherwise it details the problems
attached to the given email.
tests:
- accepted_values:
values:
- "Email address is not deliverable"
- "Email domain is not registered"
- "Email domain listed as disposable"
- name: phone_flag
data_type: text
description: |
Null if the phone number shows no issues, otherwise it details the
problems attached to the given phone number.
tests:
- accepted_values:
values:
- "Phone number not reachable"
- "Not a real phone number"
- "Phone number is disposable"
- name: watch_list
data_type: text
description: ""
tests:
- accepted_values:
values:
- "Match"
- "No Match"
- name: channel
data_type: text
description: ""
- name: checkin_at_utc
data_type: timestamp without time zone
description: ""
- name: checkout_at_utc
data_type: timestamp without time zone
description: ""
- name: is_cancelled
data_type: boolean
description: |
Indicates if the booking has been cancelled or not. At the source,
null and false values have the same meaning, so here we turn nulls
into false to keep thing simple.
tests:
- not_null
- accepted_values:
values:
- true
- false
- name: cancelled_at_utc
data_type: timestamp without time zone
description: If the booking was cancelled, when did we learn about it.
tests:
- not_null:
where: is_cancelled = true
- name: user_email
data_type: text
description: |
The email of the Athena/e-deposit partner user.
- name: guest_email
data_type: text
description: ""
- name: guest_last_name
data_type: text
description: ""
- name: guest_first_name
data_type: text
description: ""
- name: guest_telephone
data_type: text
description: ""
- name: company_name
data_type: text
description: ""
- name: property_manager_name
data_type: text
description: ""
- name: property_manager_email
data_type: text
description: ""
- name: listing_name
data_type: text
description: ""
- name: listing_town
data_type: text
description: ""
- name: listing_address
data_type: text
description: ""
- name: listing_country
data_type: text
description: ""
- name: listing_postcode
data_type: text
description: ""
- name: pets_allowed
data_type: boolean
description: ""
- name: level_of_protection_amount
data_type: integer
description: Ignore.
- name: level_of_protection_currency
data_type: text
description: Ignore.
- name: attachments
data_type: text
description: ""
- name: status_updated_at_utc
data_type: timestamp without time zone
description: |
Timestamp of the last time the record was modified before screening
happened. Only relevant for V1 records.
tests:
- not_null:
where: version = 'V1'
- name: status_updated_date_utc
data_type: timestamp without time zone
description: |
Date of the last time the record was modified before screening
happened. Only relevant for V1 records.
tests:
- not_null:
where: version = 'V1'
- name: updated_at_utc
data_type: timestamp without time zone
description: |
Timestamp of the last edit of the record, as set by the
Athena/e-deposit application.
tests:
- not_null
- name: updated_date_utc
data_type: timestamp without time zone
description: |
Date of the last edit of the record, as set by the
Athena/e-deposit application.
tests:
- not_null
- name: athena_creation_at_utc
data_type: timestamp without time zone
description: |
A client-provided timestamp of when the booking was created. It's an
optional field, only relevant for 'V1' records.
- name: athena_creation_date_utc
data_type: timestamp without time zone
description: |
A client-provided date of when the booking was created. It's an
optional field, only relevant for 'V1' records.
- name: created_at_utc
data_type: timestamp without time zone
description: |
The internal application timestamp of when this record was created.
tests:
- not_null
- name: created_date_utc
data_type: timestamp without time zone
description: |
The internal application date of when this record was created.
tests:
- not_null
- name: cosmos_db_timestamp_utc
data_type: timestamp with time zone
description: The internal Cosmos DB timestamp of the last record update.
tests:
- not_null

View file

@ -29,8 +29,8 @@ with
select
{{ adapter.quote("documents") }} ->> 'id' as id_verification,
{{ adapter.quote("documents") }} ->> 'BookingId' as id_booking,
{{ adapter.quote("documents") }} ->> 'userId' as id_user,
{{ adapter.quote("documents") }} ->> 'ListingId' as id_listing,
{{ adapter.quote("documents") }} ->> 'userId' as id_user_partner,
{{ adapter.quote("documents") }} ->> 'ListingId' as id_accommodation,
{{ adapter.quote("documents") }} ->> 'Version' as "version",
@ -51,7 +51,9 @@ with
as checkin_at_utc,
({{ adapter.quote("documents") }} ->> 'CheckOut')::timestamp
as checkout_at_utc,
({{ adapter.quote("documents") }} ->> 'Cancelled')::boolean as is_cancelled,
coalesce(
({{ adapter.quote("documents") }} ->> 'Cancelled')::boolean, false
) as is_cancelled,
({{ adapter.quote("documents") }} ->> 'CancellationDate')::timestamp
as cancelled_at_utc,
@ -85,12 +87,20 @@ with
({{ adapter.quote("documents") }} ->> 'StatusUpdatedDate')::timestamp
as status_updated_at_utc,
({{ adapter.quote("documents") }} ->> 'StatusUpdatedDate')::date
as status_updated_date_utc,
({{ adapter.quote("documents") }} ->> 'UpdatedDate')::timestamp
as updated_at_utc,
({{ adapter.quote("documents") }} ->> 'UpdatedDate')::date
as updated_date_utc,
({{ adapter.quote("documents") }} ->> 'CreationDate')::timestamp
as creation_at_utc,
as athena_creation_at_utc,
({{ adapter.quote("documents") }} ->> 'CreationDate')::date
as athena_creation_date_utc,
({{ adapter.quote("documents") }} ->> 'CreatedDate')::timestamp
as created_at_utc,
({{ adapter.quote("documents") }} ->> 'CreatedDate')::date
as created_date_utc,
to_timestamp(
(({{ adapter.quote("documents") }} ->> '_ts'))::integer
) as cosmos_db_timestamp_utc