Merged PR 5589: Limits data test coverage on resolutions data

# Description This relates to the data alerts on resolutions models. We have one incident status that refers to the record having missing information. The record that is currently triggering this alerts is indeed tagged as such. The idea is to: * Create a new boolean that flags if the incident has missing information. This is propagated stg to reporting. * Apply a where clause to any existing data alert that might be susceptible to fail due to not having enough quality, in staging and reporting. * Remove over coverage in intermediate since it's already happening in reporting. However we keep a few not-null tests and the PK on critical information (i.e., id_booking). # Checklist - [X] The edited models and dependants run properly with production data. - [X] The edited models are sufficiently documented. - [X] The edited models contain PK tests, and I've ran and passed them. - [NA] I have checked for DRY opportunities with other models and docs. - [NA] I've picked the right materialization for the affected models. # Other - [ ] Check if a full-refresh is required after this PR is merged. Related work items: #31668
2025-07-01 09:44:54 +00:00 · 2025-07-01 09:44:54 +00:00 · 8bc525e4c2
commit 8bc525e4c2
parent ca939e0c69
6 changed files with 99 additions and 141 deletions
--- a/models/staging/resolutions/schema.yml
+++ b/models/staging/resolutions/schema.yml
@ -24,6 +24,14 @@ models:
        data_tests:
          - not_null

+      - name: is_incident_missing_details
+        data_type: boolean
+        description: |
+          Flag to indicate if the incident is currently missing details.
+          This is used to limit the data tests coverage.
+        data_tests:
+          - not_null
+
      - name: is_submission_complete
        data_type: boolean
        description: "Flag to indicate if the submission is complete."
@ -69,6 +77,7 @@ models:
          - relationships:
              to: ref('stg_core__user')
              field: id_user
+              where: not is_incident_missing_details

      - name: host_account_name
        data_type: text
@ -105,6 +114,7 @@ models:
          - relationships:
              to: ref('stg_core__user')
              field: id_user
+              where: not is_incident_missing_details

      - name: guest_email
        data_type: text
@ -146,13 +156,14 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: not is_incident_missing_details

      - name: deposit_retained_currency
        data_type: text
        description: "Currency of the deposit retained."
        data_tests:
          - not_null:
-              where: "deposit_retained_amount_in_txn_currency > 0"
+              where: "deposit_retained_amount_in_txn_currency > 0 and not is_incident_missing_details"

      - name: has_guest_contributed_to_cost
        data_type: boolean
@ -172,13 +183,14 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: not is_incident_missing_details

      - name: guest_contribution_currency
        data_type: text
        description: "Currency of the guest contribution."
        data_tests:
          - not_null:
-              where: "guest_contribution_amount_in_txn_currency > 0"
+              where: "guest_contribution_amount_in_txn_currency > 0 and not is_incident_missing_details"

      - name: is_guest_contacted_about_damage
        data_type: boolean
@ -193,10 +205,12 @@ models:
        data_type: numeric
        description: "Superhog code that uniquely identifies a single accommodation."
        data_tests:
-          - not_null
+          - not_null:
+              where: "not is_incident_missing_details"
          - relationships:
              to: ref('stg_core__accommodation')
              field: id_accommodation
+              where: not is_incident_missing_details

      - name: accommodation_name
        data_type: text
@ -226,25 +240,29 @@ models:
        data_type: timestamp without time zone
        description: "Timestamp of the check-in date in UTC of the booking."
        data_tests:
-          - not_null
+          - not_null:
+              where: "not is_incident_missing_details"

      - name: check_in_date_utc
        data_type: date
        description: "Date of the check-in date in UTC of the booking."
        data_tests:
-          - not_null
+          - not_null:
+              where: "not is_incident_missing_details"

      - name: check_out_at_utc
        data_type: timestamp without time zone
        description: "Timestamp of the check-out date in UTC of the booking."
        data_tests:
-          - not_null
+          - not_null:
+              where: "not is_incident_missing_details"

      - name: check_out_date_utc
        data_type: date
        description: "Date of the check-out date in UTC of the booking."
        data_tests:
-          - not_null
+          - not_null:
+              where: "not is_incident_missing_details"

      - name: id_booking
        data_type: numeric
@ -270,6 +288,7 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: "not is_incident_missing_details"

      - name: booking_services
        data_type: text
@ -301,7 +320,7 @@ models:
        description: "Timestamp of the calculation."
        data_tests:
          - not_null:
-              where: "calculated_payout_amount_in_txn_currency > 0"
+              where: "calculated_payout_amount_in_txn_currency > 0 and not is_incident_missing_details"

      - name: protection_name
        data_type: text
@ -323,6 +342,7 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: "not is_incident_missing_details"

      - name: upper_protection_limit_usd
        data_type: numeric
@ -331,6 +351,7 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: "not is_incident_missing_details"

      - name: calculated_payout_amount_in_txn_currency
        data_type: numeric
@ -339,13 +360,14 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: "not is_incident_missing_details"

      - name: calculated_payout_currency
        data_type: text
        description: "Currency of the calculated payout amount."
        data_tests:
          - not_null:
-              where: "calculated_payout_amount_in_txn_currency > 0"
+              where: "calculated_payout_amount_in_txn_currency > 0 and not is_incident_missing_details"

      - name: calculated_payout_amount_in_usd
        data_type: numeric
@ -354,6 +376,7 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: "not is_incident_missing_details"

      - name: submitted_payout_amount_in_txn_currency
        data_type: numeric
@ -362,13 +385,14 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: "not is_incident_missing_details"

      - name: submitted_payout_currency
        data_type: text
        description: "Currency of the submitted payout amount."
        data_tests:
          - not_null:
-              where: "submitted_payout_amount_in_txn_currency > 0"
+              where: "submitted_payout_amount_in_txn_currency > 0 and not is_incident_missing_details"

      - name: submitted_payout_amount_in_usd
        data_type: numeric
@ -377,6 +401,7 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: "not is_incident_missing_details"

      - name: calculated_guest_charge_amount_in_txn_currency
        data_type: numeric
@ -385,13 +410,14 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: "not is_incident_missing_details"

      - name: calculated_guest_charge_currency
        data_type: text
        description: "Currency of the calculated guest charge amount."
        data_tests:
          - not_null:
-              where: "calculated_guest_charge_amount_in_txn_currency > 0"
+              where: "calculated_guest_charge_amount_in_txn_currency > 0 and not is_incident_missing_details"

      - name: calculated_guest_charge_amount_in_usd
        data_type: numeric
@ -400,6 +426,7 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: "not is_incident_missing_details"

      - name: submitted_guest_charge_amount_in_txn_currency
        data_type: numeric
@ -408,13 +435,14 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: "not is_incident_missing_details"

      - name: submitted_guest_charge_currency
        data_type: text
        description: "Currency of the submitted guest charge amount."
        data_tests:
          - not_null:
-              where: "submitted_guest_charge_amount_in_txn_currency > 0"
+              where: "submitted_guest_charge_amount_in_txn_currency > 0 and not is_incident_missing_details"

      - name: submitted_guest_charge_amount_in_usd
        data_type: numeric
@ -423,6 +451,7 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: "not is_incident_missing_details"

      - name: third_party_claim_submitted
        data_type: boolean
@ -440,6 +469,7 @@ models:
          - dbt_expectations.expect_column_values_to_be_between:
              min_value: 0
              strictly: false
+              where: "not is_incident_missing_details"

      - name: third_party_claim_currency
        data_type: text
@ -447,7 +477,7 @@ models:
          compensation from another platform."
        data_tests:
          - not_null:
-              where: "third_party_claim_amount_in_txn_currency > 0"
+              where: "third_party_claim_amount_in_txn_currency > 0 and not is_incident_missing_details"

      - name: cosmos_db_timestamp_utc
        data_type: timestamp
--- a/models/staging/resolutions/stg_resolutions__incidents.sql
+++ b/models/staging/resolutions/stg_resolutions__incidents.sql
@ -3,9 +3,13 @@
 -- Many of them don't have the same necessary fields as the current model.
 -- The same happens with the 'ManualFormWeb' document_version.
 {% set old_or_manual_documents = ("OLDCLAIMTOINCIDENTMODEL", "MANUALFORMWEB") %}
+
 -- Test or cancelled incidents are not considered in the reporting.
 {% set tests_or_cancelled_incidents = "ARCHIVED" %}

+-- Some incidents have insufficient details which might create data quality issues.
+{% set insufficient_details_incidents = "INSUFFICIENT DETAILS" %}
+
 with
    raw_incident as (select * from {{ source("resolutions", "incident") }}),
    deduped_verifications as (
@ -16,6 +20,8 @@ select
    {{ adapter.quote("documents") }} ->> 'id' as id_incident,
    {{ adapter.quote("documents") }} ->> 'VerificationId' as id_verification,
    {{ adapter.quote("documents") }} ->> 'CurrentStatusName' as current_status_name,
+    upper({{ adapter.quote("documents") }} ->> 'CurrentStatusName')
+    = '{{ insufficient_details_incidents }}' as is_incident_missing_details,
    ({{ adapter.quote("documents") }} ->> 'IsSubmissionComplete')::boolean
    as is_submission_complete,
    {{ adapter.quote("documents") }} ->> 'CurrentAgentName' as current_agent_name,