Merged PR 5403: Removes test accounts

# Description

This PR directly removes test accounts, and not just flag them, from:
* `int_core__unified_user`, which means is also propagated to `int_core__user_host`.

It does so by creating a new, extremely simple model named: `int_core__user_test_account`

Then, `int_core__user_host` is inner joined with a small explanation comment on:
* `int_core__accommodation`
* `int_core__unique_accommodation_to_user`
* `int_core__bookings`
* `int_core__verification_requests`
* `int_core__payments`

Effectively removing test accounts.

Lastly, any existence and usage of the column `is_test_account` has been removed -> mostly on New Dash-related models.

# Checklist

**As discussed in the daily, I'll be playing around in prod with this PR**

- [ ] The edited models and dependants run properly with production data.
- [ ] The edited models are sufficiently documented.
- [ ] The edited models contain PK tests, and I've ran and passed them.
- [ ] I have checked for DRY opportunities with other models and docs.
- [ ] I've picked the right materialization for the affected models.

# Other

- [ ] Check if a full-refresh is required after this PR is merged.

Related work items: #27319
This commit is contained in:
Oriol Roqué Paniagua 2025-06-06 12:17:49 +00:00
parent 08678427ad
commit dc0abe6098
15 changed files with 76 additions and 52 deletions

View file

@ -4,7 +4,8 @@ with
int_core__unique_accommodation_to_user as (
select * from {{ ref("int_core__unique_accommodation_to_user") }}
),
int_core__country as (select * from {{ ref("int_core__country") }})
int_core__country as (select * from {{ ref("int_core__country") }}),
int_core__user_host as (select * from {{ ref("int_core__user_host") }})
select
sca.id_accommodation,
@ -32,4 +33,9 @@ from stg_core__accommodation sca
left join
int_core__unique_accommodation_to_user scatu
on sca.id_accommodation = scatu.id_accommodation
-- The following join is to ensure we include only Users categorised as Hosts,
-- which in turn ensures the removal of test accounts.
inner join
int_core__user_host uh
on coalesce(scatu.id_user_owner, sca.id_creating_user) = uh.id_user_host
left join int_core__country icc on sca.id_country = icc.id_country

View file

@ -51,5 +51,5 @@ left join
booking_integrations bi
on bi.id_user_host = uh.id_user_host
and bi.display_name = it.display_name
where uh.is_test_account = false and uh.has_active_pms = true
where uh.has_active_pms = true
group by 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12

View file

@ -58,7 +58,7 @@ from stg_core__booking_view_to_service bvts
inner join stg_core__booking_view bv on bvts.id_booking_view = bv.id_booking_view
left join stg_core__booking b on bv.id_booking = b.id_booking
left join stg_core__booking_state bs on b.id_booking_state = bs.id_booking_state
left join int_core__user_host uh on b.id_user_host = uh.id_user_host
inner join int_core__user_host uh on b.id_user_host = uh.id_user_host
left join
stg_core__booking_to_product_bundle btpb
on b.id_booking = btpb.id_booking
@ -66,4 +66,3 @@ left join
left join
stg_core__user_product_bundle upb
on btpb.id_user_product_bundle = upb.id_user_product_bundle
where uh.is_test_account = false

View file

@ -15,7 +15,8 @@ with
),
int_core__verification_request_booking_source as (
select * from {{ ref("int_core__verification_request_booking_source") }}
)
),
int_core__user_host as (select * from {{ ref("int_core__user_host") }})
select
b.id_booking,
b.id_user_guest,
@ -49,6 +50,9 @@ select
b.updated_date_utc,
b.dwh_extracted_at_utc
from stg_core__booking b
-- The following join is to ensure we include only Users categorised as Hosts,
-- which in turn ensures the removal of test accounts.
inner join int_core__user_host uh on b.id_user_host = uh.id_user_host
left join stg_core__booking_state bs on b.id_booking_state = bs.id_booking_state
left join int_core__duplicate_bookings db on b.id_booking = db.id_booking
left join int_core__booking_charge_events bce on b.id_booking = bce.id_booking

View file

@ -149,10 +149,7 @@ with
left join accommodation_aggregation a on upb.id_user_host = a.id_user_host
left join atpb_aggregation atpb on upb.id_user_host = atpb.id_user_host
left join btpb_aggregation btpb on upb.id_user_host = btpb.id_user_host
left join
int_core__user_host uh
on upb.id_user_host = uh.id_user_host
and uh.is_test_account = false
inner join int_core__user_host uh on upb.id_user_host = uh.id_user_host
left join int_kpis__lifecycle_daily_deal kldd on uh.id_deal = kldd.id_deal
)
select

View file

@ -75,7 +75,9 @@ select
b.check_out_date_utc,
uh.account_currency_iso4217 as host_currency
from int_core__guest_journey_payments gjp
left join int_core__user_host uh on gjp.id_user_host = uh.id_user_host
-- The following join is to ensure we include only Users categorised as Hosts,
-- which in turn ensures the removal of test accounts.
inner join int_core__user_host uh on gjp.id_user_host = uh.id_user_host
left join
ranked_bookings rb
on gjp.id_verification_request = rb.id_verification_request

View file

@ -1,19 +1,11 @@
{% set test_account_claim_type = "('TESTACCOUNT')" %}
{% set test_account_claim_value = "('1')" %}
with
stg_core__user as (select * from {{ ref("stg_core__user") }}),
stg_core__superhog_user as (select * from {{ ref("stg_core__superhog_user") }}),
stg_core__currency as (select * from {{ ref("stg_core__currency") }}),
stg_core__country as (select * from {{ ref("stg_core__country") }}),
stg_core__account_type as (select * from {{ ref("stg_core__account_type") }}),
stg_core__claim as (select * from {{ ref("stg_core__claim") }}),
test_accounts as (
select distinct id_user
from stg_core__claim
where
upper(claim_type) in {{ test_account_claim_type }}
and claim_value in {{ test_account_claim_value }}
int_core__user_test_account as (
select * from {{ ref("int_core__user_test_account") }}
)
select
u.id_user,
@ -53,11 +45,13 @@ select
su.verified_date_utc,
su.number_of_properties,
su.id_superhog_verified_set,
su.id_user_verification_status,
case when ta.id_user is not null then true else false end as is_test_account
su.id_user_verification_status
from stg_core__user as u
inner join stg_core__superhog_user as su on u.id_user = su.id_superhoguser
left join stg_core__currency cu on u.id_account_currency = cu.id_currency
left join stg_core__country co on u.id_billing_country = co.id_country
left join stg_core__account_type act on u.id_account_type = act.id_account_type
left join test_accounts ta on u.id_user = ta.id_user
-- Exclude test accounts
left join
int_core__user_test_account ta on u.id_user = ta.id_user
where ta.id_user is null

View file

@ -1,4 +1,3 @@
/*
This model selects the last user host assigned to an accommodation
based on the maximum id_accommodation_to_user. This means, if we have
@ -19,6 +18,7 @@ with
stg_core__accommodation_to_user as (
select * from {{ ref("stg_core__accommodation_to_user") }}
),
int_core__user_host as (select * from {{ ref("int_core__user_host") }}),
duplicated_accommodation_to_user as (
select
id_user_owner,
@ -28,6 +28,9 @@ with
partition by id_accommodation order by id_accommodation_to_user desc
) as rn
from stg_core__accommodation_to_user scatu
-- The following join is to ensure we include only Users categorised as Hosts,
-- which in turn ensures the removal of test accounts.
inner join int_core__user_host uh on scatu.id_user_owner = uh.id_user_host
)
select id_user_owner, id_accommodation
from duplicated_accommodation_to_user

View file

@ -51,7 +51,6 @@ with
select
uu.id_user as id_user_host,
uu.account_type,
uu.is_test_account,
uu.id_billing_country,
uu.billing_country_name,
uu.billing_country_iso_2,

View file

@ -78,7 +78,6 @@ inner join
on upb.id_user = uh.id_user_host
and uh.is_user_in_new_dash = true
and uh.is_missing_id_deal = false
and uh.is_test_account = false
left join stg_core__protection_plan pp on upb.id_protection_plan = pp.id_protection_plan
left join
product_bundle_services_agg pbsa

View file

@ -12,10 +12,7 @@ with
int_core__user_host as (
select *
from {{ ref("int_core__user_host") }}
where
is_user_in_new_dash = true
and is_missing_id_deal = false
and is_test_account = false
where is_user_in_new_dash = true and is_missing_id_deal = false
)
select

View file

@ -0,0 +1,15 @@
{% set test_account_claim_type = "('TESTACCOUNT')" %}
{% set test_account_claim_value = "('1')" %}
{{
config(
materialized="table",
)
}}
with stg_core__claim as (select * from {{ ref("stg_core__claim") }})
select distinct id_user
from stg_core__claim
where
upper(claim_type) in {{ test_account_claim_type }}
and claim_value in {{ test_account_claim_value }}

View file

@ -11,7 +11,8 @@ with
),
int_core__verification_request_booking_source as (
select * from {{ ref("int_core__verification_request_booking_source") }}
)
),
int_core__user_host as (select * from {{ ref("int_core__user_host") }})
select
vr.id_verification_request,
vr.uuid_verification_request,
@ -56,6 +57,9 @@ select
vr.updated_date_utc,
vr.dwh_extracted_at_utc
from stg_core__verification_request vr
-- The following join is to ensure we include only Users categorised as Hosts,
-- which in turn ensures the removal of test accounts.
inner join int_core__user_host uh on vr.id_user_host = uh.id_user_host
left join
int_core__verification_request_completeness completeness
on vr.id_verification_request = completeness.id_verification_request

View file

@ -171,6 +171,11 @@ models:
during check-in.
- name: int_core__unified_user
description: |
A model that unifies the user information from the backend.
It contains information on the user, such as name, email, phone number,
billing country, account type, and other relevant details.
Be aware that this model excludes any test account.
columns:
- name: id_user
data_type: character varying
@ -301,16 +306,9 @@ models:
- name: id_user_verification_status
data_type: bigint
description: The ID of the user verification status.
- name: is_test_account
data_type: boolean
description: |
True if the user is a test account. False otherwise.
Note that there might be other users that are test accounts
that are not controlled by this field. However it provides
a first level of filtering.
- name: int_core__vr_check_in_cover
description: |
description: |
This tables holds information on verification requests with Ckeck-in Hero
available for the guests.
@ -836,15 +834,15 @@ models:
This model is restricted to active user so it doesn't include historical data
like users that had check-in cover but are currently inactive.
The new candidate version for this model, v2, changes the strategy to
The new candidate version for this model, v2, changes the strategy to
define which hosts are offering check in cover. v1 used to look at
the verification set configurations and added hosts which had the cover in
it. v2 instead simply assumes that hosts that have had Guest Journeys
which offer the cover in the last 30 natural days are relevant. This is
it. v2 instead simply assumes that hosts that have had Guest Journeys
which offer the cover in the last 30 natural days are relevant. This is
simpler, but considered good enough. This decision was made as part of the
migration to the Guest Products data model, which would have made sticking
to the old way extremely complex and error prone.
versions:
- v: 1
- v: 2
@ -1656,13 +1654,6 @@ models:
data_type: string
description: |
Name of the account type. Can be null and might be not up-to-date.
- name: is_test_account
data_type: boolean
description: |
Flag to identify if the user is a test account.
Can be null and might be not up-to-date. Be aware that
this might not include all test accounts, but it provides
a first level of filtering.
- name: id_billing_country
data_type: integer
description: |
@ -6498,3 +6489,19 @@ models:
be false as status only reflects the latest transition.
data_tests:
- not_null
- name: int_core__user_test_account
description: |
A simple model that holds all the test accounts in the system.
This is used to identify test accounts and filter them out from
production data.
columns:
- name: id_user
data_type: character varying
description: |
Unique identifier of the user that is a test account.
It cannot be null.
data_tests:
- not_null
- unique

View file

@ -38,7 +38,6 @@ with
-- Filters to replicate New Dash Overview
icuh.is_user_in_new_dash = true
and icuh.is_missing_id_deal = false
and icuh.is_test_account = false
-- Filter to only select new business (i.e., not migrated from Old Dash)
and icuh.has_user_moved_from_old_dash = false
group by 1, 2, 3, 4, 5, 6, 7, 8
@ -88,7 +87,6 @@ with
-- Filters to replicate New Dash Overview
icuh.is_user_in_new_dash = true
and icuh.is_missing_id_deal = false
and icuh.is_test_account = false
-- Filter to only select new business (i.e., not migrated from Old Dash)
and icuh.has_user_moved_from_old_dash = false
group by 1