Merged PR 5589: Limits data test coverage on resolutions data

# Description

This relates to the data alerts on resolutions models.

We have one incident status that refers to the record having missing information. The record that is currently triggering this alerts is indeed tagged as such.

The idea is to:
* Create a new boolean that flags if the incident has missing information. This is propagated stg to reporting.
* Apply a where clause to any existing data alert that might be susceptible to fail due to not having enough quality, in staging and reporting.
* Remove over coverage in intermediate since it's already happening in reporting. However we keep a few not-null tests and the PK on critical information (i.e., id_booking).

# Checklist

- [X] The edited models and dependants run properly with production data.
- [X] The edited models are sufficiently documented.
- [X] The edited models contain PK tests, and I've ran and passed them.
- [NA] I have checked for DRY opportunities with other models and docs.
- [NA] I've picked the right materialization for the affected models.

# Other

- [ ] Check if a full-refresh is required after this PR is merged.

Related work items: #31668
This commit is contained in:
Oriol Roqué Paniagua 2025-07-01 09:44:54 +00:00
parent ca939e0c69
commit 8bc525e4c2
6 changed files with 99 additions and 141 deletions

View file

@ -24,6 +24,14 @@ models:
data_tests:
- not_null
- name: is_incident_missing_details
data_type: boolean
description: |
Flag to indicate if the incident is currently missing details.
This is used to limit the data tests coverage.
data_tests:
- not_null
- name: is_submission_complete
data_type: boolean
description: "Flag to indicate if the submission is complete."
@ -69,6 +77,7 @@ models:
- relationships:
to: ref('stg_core__user')
field: id_user
where: not is_incident_missing_details
- name: host_account_name
data_type: text
@ -105,6 +114,7 @@ models:
- relationships:
to: ref('stg_core__user')
field: id_user
where: not is_incident_missing_details
- name: guest_email
data_type: text
@ -146,13 +156,14 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: not is_incident_missing_details
- name: deposit_retained_currency
data_type: text
description: "Currency of the deposit retained."
data_tests:
- not_null:
where: "deposit_retained_amount_in_txn_currency > 0"
where: "deposit_retained_amount_in_txn_currency > 0 and not is_incident_missing_details"
- name: has_guest_contributed_to_cost
data_type: boolean
@ -172,13 +183,14 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: not is_incident_missing_details
- name: guest_contribution_currency
data_type: text
description: "Currency of the guest contribution."
data_tests:
- not_null:
where: "guest_contribution_amount_in_txn_currency > 0"
where: "guest_contribution_amount_in_txn_currency > 0 and not is_incident_missing_details"
- name: is_guest_contacted_about_damage
data_type: boolean
@ -193,10 +205,12 @@ models:
data_type: numeric
description: "Superhog code that uniquely identifies a single accommodation."
data_tests:
- not_null
- not_null:
where: "not is_incident_missing_details"
- relationships:
to: ref('stg_core__accommodation')
field: id_accommodation
where: not is_incident_missing_details
- name: accommodation_name
data_type: text
@ -226,25 +240,29 @@ models:
data_type: timestamp without time zone
description: "Timestamp of the check-in date in UTC of the booking."
data_tests:
- not_null
- not_null:
where: "not is_incident_missing_details"
- name: check_in_date_utc
data_type: date
description: "Date of the check-in date in UTC of the booking."
data_tests:
- not_null
- not_null:
where: "not is_incident_missing_details"
- name: check_out_at_utc
data_type: timestamp without time zone
description: "Timestamp of the check-out date in UTC of the booking."
data_tests:
- not_null
- not_null:
where: "not is_incident_missing_details"
- name: check_out_date_utc
data_type: date
description: "Date of the check-out date in UTC of the booking."
data_tests:
- not_null
- not_null:
where: "not is_incident_missing_details"
- name: id_booking
data_type: numeric
@ -270,6 +288,7 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: "not is_incident_missing_details"
- name: booking_services
data_type: text
@ -301,7 +320,7 @@ models:
description: "Timestamp of the calculation."
data_tests:
- not_null:
where: "calculated_payout_amount_in_txn_currency > 0"
where: "calculated_payout_amount_in_txn_currency > 0 and not is_incident_missing_details"
- name: protection_name
data_type: text
@ -323,6 +342,7 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: "not is_incident_missing_details"
- name: upper_protection_limit_usd
data_type: numeric
@ -331,6 +351,7 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: "not is_incident_missing_details"
- name: calculated_payout_amount_in_txn_currency
data_type: numeric
@ -339,13 +360,14 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: "not is_incident_missing_details"
- name: calculated_payout_currency
data_type: text
description: "Currency of the calculated payout amount."
data_tests:
- not_null:
where: "calculated_payout_amount_in_txn_currency > 0"
where: "calculated_payout_amount_in_txn_currency > 0 and not is_incident_missing_details"
- name: calculated_payout_amount_in_usd
data_type: numeric
@ -354,6 +376,7 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: "not is_incident_missing_details"
- name: submitted_payout_amount_in_txn_currency
data_type: numeric
@ -362,13 +385,14 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: "not is_incident_missing_details"
- name: submitted_payout_currency
data_type: text
description: "Currency of the submitted payout amount."
data_tests:
- not_null:
where: "submitted_payout_amount_in_txn_currency > 0"
where: "submitted_payout_amount_in_txn_currency > 0 and not is_incident_missing_details"
- name: submitted_payout_amount_in_usd
data_type: numeric
@ -377,6 +401,7 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: "not is_incident_missing_details"
- name: calculated_guest_charge_amount_in_txn_currency
data_type: numeric
@ -385,13 +410,14 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: "not is_incident_missing_details"
- name: calculated_guest_charge_currency
data_type: text
description: "Currency of the calculated guest charge amount."
data_tests:
- not_null:
where: "calculated_guest_charge_amount_in_txn_currency > 0"
where: "calculated_guest_charge_amount_in_txn_currency > 0 and not is_incident_missing_details"
- name: calculated_guest_charge_amount_in_usd
data_type: numeric
@ -400,6 +426,7 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: "not is_incident_missing_details"
- name: submitted_guest_charge_amount_in_txn_currency
data_type: numeric
@ -408,13 +435,14 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: "not is_incident_missing_details"
- name: submitted_guest_charge_currency
data_type: text
description: "Currency of the submitted guest charge amount."
data_tests:
- not_null:
where: "submitted_guest_charge_amount_in_txn_currency > 0"
where: "submitted_guest_charge_amount_in_txn_currency > 0 and not is_incident_missing_details"
- name: submitted_guest_charge_amount_in_usd
data_type: numeric
@ -423,6 +451,7 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: "not is_incident_missing_details"
- name: third_party_claim_submitted
data_type: boolean
@ -440,6 +469,7 @@ models:
- dbt_expectations.expect_column_values_to_be_between:
min_value: 0
strictly: false
where: "not is_incident_missing_details"
- name: third_party_claim_currency
data_type: text
@ -447,7 +477,7 @@ models:
compensation from another platform."
data_tests:
- not_null:
where: "third_party_claim_amount_in_txn_currency > 0"
where: "third_party_claim_amount_in_txn_currency > 0 and not is_incident_missing_details"
- name: cosmos_db_timestamp_utc
data_type: timestamp

View file

@ -3,9 +3,13 @@
-- Many of them don't have the same necessary fields as the current model.
-- The same happens with the 'ManualFormWeb' document_version.
{% set old_or_manual_documents = ("OLDCLAIMTOINCIDENTMODEL", "MANUALFORMWEB") %}
-- Test or cancelled incidents are not considered in the reporting.
{% set tests_or_cancelled_incidents = "ARCHIVED" %}
-- Some incidents have insufficient details which might create data quality issues.
{% set insufficient_details_incidents = "INSUFFICIENT DETAILS" %}
with
raw_incident as (select * from {{ source("resolutions", "incident") }}),
deduped_verifications as (
@ -16,6 +20,8 @@ select
{{ adapter.quote("documents") }} ->> 'id' as id_incident,
{{ adapter.quote("documents") }} ->> 'VerificationId' as id_verification,
{{ adapter.quote("documents") }} ->> 'CurrentStatusName' as current_status_name,
upper({{ adapter.quote("documents") }} ->> 'CurrentStatusName')
= '{{ insufficient_details_incidents }}' as is_incident_missing_details,
({{ adapter.quote("documents") }} ->> 'IsSubmissionComplete')::boolean
as is_submission_complete,
{{ adapter.quote("documents") }} ->> 'CurrentAgentName' as current_agent_name,