Merged PR 5403: Removes test accounts

# Description

This PR directly removes test accounts, and not just flag them, from:
* `int_core__unified_user`, which means is also propagated to `int_core__user_host`.

It does so by creating a new, extremely simple model named: `int_core__user_test_account`

Then, `int_core__user_host` is inner joined with a small explanation comment on:
* `int_core__accommodation`
* `int_core__unique_accommodation_to_user`
* `int_core__bookings`
* `int_core__verification_requests`
* `int_core__payments`

Effectively removing test accounts.

Lastly, any existence and usage of the column `is_test_account` has been removed -> mostly on New Dash-related models.

# Checklist

**As discussed in the daily, I'll be playing around in prod with this PR**

- [ ] The edited models and dependants run properly with production data.
- [ ] The edited models are sufficiently documented.
- [ ] The edited models contain PK tests, and I've ran and passed them.
- [ ] I have checked for DRY opportunities with other models and docs.
- [ ] I've picked the right materialization for the affected models.

# Other

- [ ] Check if a full-refresh is required after this PR is merged.

Related work items: #27319
This commit is contained in:
Oriol Roqué Paniagua 2025-06-06 12:17:49 +00:00
parent 08678427ad
commit dc0abe6098
15 changed files with 76 additions and 52 deletions

View file

@ -4,7 +4,8 @@ with
int_core__unique_accommodation_to_user as ( int_core__unique_accommodation_to_user as (
select * from {{ ref("int_core__unique_accommodation_to_user") }} select * from {{ ref("int_core__unique_accommodation_to_user") }}
), ),
int_core__country as (select * from {{ ref("int_core__country") }}) int_core__country as (select * from {{ ref("int_core__country") }}),
int_core__user_host as (select * from {{ ref("int_core__user_host") }})
select select
sca.id_accommodation, sca.id_accommodation,
@ -32,4 +33,9 @@ from stg_core__accommodation sca
left join left join
int_core__unique_accommodation_to_user scatu int_core__unique_accommodation_to_user scatu
on sca.id_accommodation = scatu.id_accommodation on sca.id_accommodation = scatu.id_accommodation
-- The following join is to ensure we include only Users categorised as Hosts,
-- which in turn ensures the removal of test accounts.
inner join
int_core__user_host uh
on coalesce(scatu.id_user_owner, sca.id_creating_user) = uh.id_user_host
left join int_core__country icc on sca.id_country = icc.id_country left join int_core__country icc on sca.id_country = icc.id_country

View file

@ -51,5 +51,5 @@ left join
booking_integrations bi booking_integrations bi
on bi.id_user_host = uh.id_user_host on bi.id_user_host = uh.id_user_host
and bi.display_name = it.display_name and bi.display_name = it.display_name
where uh.is_test_account = false and uh.has_active_pms = true where uh.has_active_pms = true
group by 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 group by 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12

View file

@ -58,7 +58,7 @@ from stg_core__booking_view_to_service bvts
inner join stg_core__booking_view bv on bvts.id_booking_view = bv.id_booking_view inner join stg_core__booking_view bv on bvts.id_booking_view = bv.id_booking_view
left join stg_core__booking b on bv.id_booking = b.id_booking left join stg_core__booking b on bv.id_booking = b.id_booking
left join stg_core__booking_state bs on b.id_booking_state = bs.id_booking_state left join stg_core__booking_state bs on b.id_booking_state = bs.id_booking_state
left join int_core__user_host uh on b.id_user_host = uh.id_user_host inner join int_core__user_host uh on b.id_user_host = uh.id_user_host
left join left join
stg_core__booking_to_product_bundle btpb stg_core__booking_to_product_bundle btpb
on b.id_booking = btpb.id_booking on b.id_booking = btpb.id_booking
@ -66,4 +66,3 @@ left join
left join left join
stg_core__user_product_bundle upb stg_core__user_product_bundle upb
on btpb.id_user_product_bundle = upb.id_user_product_bundle on btpb.id_user_product_bundle = upb.id_user_product_bundle
where uh.is_test_account = false

View file

@ -15,7 +15,8 @@ with
), ),
int_core__verification_request_booking_source as ( int_core__verification_request_booking_source as (
select * from {{ ref("int_core__verification_request_booking_source") }} select * from {{ ref("int_core__verification_request_booking_source") }}
) ),
int_core__user_host as (select * from {{ ref("int_core__user_host") }})
select select
b.id_booking, b.id_booking,
b.id_user_guest, b.id_user_guest,
@ -49,6 +50,9 @@ select
b.updated_date_utc, b.updated_date_utc,
b.dwh_extracted_at_utc b.dwh_extracted_at_utc
from stg_core__booking b from stg_core__booking b
-- The following join is to ensure we include only Users categorised as Hosts,
-- which in turn ensures the removal of test accounts.
inner join int_core__user_host uh on b.id_user_host = uh.id_user_host
left join stg_core__booking_state bs on b.id_booking_state = bs.id_booking_state left join stg_core__booking_state bs on b.id_booking_state = bs.id_booking_state
left join int_core__duplicate_bookings db on b.id_booking = db.id_booking left join int_core__duplicate_bookings db on b.id_booking = db.id_booking
left join int_core__booking_charge_events bce on b.id_booking = bce.id_booking left join int_core__booking_charge_events bce on b.id_booking = bce.id_booking

View file

@ -149,10 +149,7 @@ with
left join accommodation_aggregation a on upb.id_user_host = a.id_user_host left join accommodation_aggregation a on upb.id_user_host = a.id_user_host
left join atpb_aggregation atpb on upb.id_user_host = atpb.id_user_host left join atpb_aggregation atpb on upb.id_user_host = atpb.id_user_host
left join btpb_aggregation btpb on upb.id_user_host = btpb.id_user_host left join btpb_aggregation btpb on upb.id_user_host = btpb.id_user_host
left join inner join int_core__user_host uh on upb.id_user_host = uh.id_user_host
int_core__user_host uh
on upb.id_user_host = uh.id_user_host
and uh.is_test_account = false
left join int_kpis__lifecycle_daily_deal kldd on uh.id_deal = kldd.id_deal left join int_kpis__lifecycle_daily_deal kldd on uh.id_deal = kldd.id_deal
) )
select select

View file

@ -75,7 +75,9 @@ select
b.check_out_date_utc, b.check_out_date_utc,
uh.account_currency_iso4217 as host_currency uh.account_currency_iso4217 as host_currency
from int_core__guest_journey_payments gjp from int_core__guest_journey_payments gjp
left join int_core__user_host uh on gjp.id_user_host = uh.id_user_host -- The following join is to ensure we include only Users categorised as Hosts,
-- which in turn ensures the removal of test accounts.
inner join int_core__user_host uh on gjp.id_user_host = uh.id_user_host
left join left join
ranked_bookings rb ranked_bookings rb
on gjp.id_verification_request = rb.id_verification_request on gjp.id_verification_request = rb.id_verification_request

View file

@ -1,19 +1,11 @@
{% set test_account_claim_type = "('TESTACCOUNT')" %}
{% set test_account_claim_value = "('1')" %}
with with
stg_core__user as (select * from {{ ref("stg_core__user") }}), stg_core__user as (select * from {{ ref("stg_core__user") }}),
stg_core__superhog_user as (select * from {{ ref("stg_core__superhog_user") }}), stg_core__superhog_user as (select * from {{ ref("stg_core__superhog_user") }}),
stg_core__currency as (select * from {{ ref("stg_core__currency") }}), stg_core__currency as (select * from {{ ref("stg_core__currency") }}),
stg_core__country as (select * from {{ ref("stg_core__country") }}), stg_core__country as (select * from {{ ref("stg_core__country") }}),
stg_core__account_type as (select * from {{ ref("stg_core__account_type") }}), stg_core__account_type as (select * from {{ ref("stg_core__account_type") }}),
stg_core__claim as (select * from {{ ref("stg_core__claim") }}), int_core__user_test_account as (
test_accounts as ( select * from {{ ref("int_core__user_test_account") }}
select distinct id_user
from stg_core__claim
where
upper(claim_type) in {{ test_account_claim_type }}
and claim_value in {{ test_account_claim_value }}
) )
select select
u.id_user, u.id_user,
@ -53,11 +45,13 @@ select
su.verified_date_utc, su.verified_date_utc,
su.number_of_properties, su.number_of_properties,
su.id_superhog_verified_set, su.id_superhog_verified_set,
su.id_user_verification_status, su.id_user_verification_status
case when ta.id_user is not null then true else false end as is_test_account
from stg_core__user as u from stg_core__user as u
inner join stg_core__superhog_user as su on u.id_user = su.id_superhoguser inner join stg_core__superhog_user as su on u.id_user = su.id_superhoguser
left join stg_core__currency cu on u.id_account_currency = cu.id_currency left join stg_core__currency cu on u.id_account_currency = cu.id_currency
left join stg_core__country co on u.id_billing_country = co.id_country left join stg_core__country co on u.id_billing_country = co.id_country
left join stg_core__account_type act on u.id_account_type = act.id_account_type left join stg_core__account_type act on u.id_account_type = act.id_account_type
left join test_accounts ta on u.id_user = ta.id_user -- Exclude test accounts
left join
int_core__user_test_account ta on u.id_user = ta.id_user
where ta.id_user is null

View file

@ -1,4 +1,3 @@
/* /*
This model selects the last user host assigned to an accommodation This model selects the last user host assigned to an accommodation
based on the maximum id_accommodation_to_user. This means, if we have based on the maximum id_accommodation_to_user. This means, if we have
@ -19,6 +18,7 @@ with
stg_core__accommodation_to_user as ( stg_core__accommodation_to_user as (
select * from {{ ref("stg_core__accommodation_to_user") }} select * from {{ ref("stg_core__accommodation_to_user") }}
), ),
int_core__user_host as (select * from {{ ref("int_core__user_host") }}),
duplicated_accommodation_to_user as ( duplicated_accommodation_to_user as (
select select
id_user_owner, id_user_owner,
@ -28,6 +28,9 @@ with
partition by id_accommodation order by id_accommodation_to_user desc partition by id_accommodation order by id_accommodation_to_user desc
) as rn ) as rn
from stg_core__accommodation_to_user scatu from stg_core__accommodation_to_user scatu
-- The following join is to ensure we include only Users categorised as Hosts,
-- which in turn ensures the removal of test accounts.
inner join int_core__user_host uh on scatu.id_user_owner = uh.id_user_host
) )
select id_user_owner, id_accommodation select id_user_owner, id_accommodation
from duplicated_accommodation_to_user from duplicated_accommodation_to_user

View file

@ -51,7 +51,6 @@ with
select select
uu.id_user as id_user_host, uu.id_user as id_user_host,
uu.account_type, uu.account_type,
uu.is_test_account,
uu.id_billing_country, uu.id_billing_country,
uu.billing_country_name, uu.billing_country_name,
uu.billing_country_iso_2, uu.billing_country_iso_2,

View file

@ -78,7 +78,6 @@ inner join
on upb.id_user = uh.id_user_host on upb.id_user = uh.id_user_host
and uh.is_user_in_new_dash = true and uh.is_user_in_new_dash = true
and uh.is_missing_id_deal = false and uh.is_missing_id_deal = false
and uh.is_test_account = false
left join stg_core__protection_plan pp on upb.id_protection_plan = pp.id_protection_plan left join stg_core__protection_plan pp on upb.id_protection_plan = pp.id_protection_plan
left join left join
product_bundle_services_agg pbsa product_bundle_services_agg pbsa

View file

@ -12,10 +12,7 @@ with
int_core__user_host as ( int_core__user_host as (
select * select *
from {{ ref("int_core__user_host") }} from {{ ref("int_core__user_host") }}
where where is_user_in_new_dash = true and is_missing_id_deal = false
is_user_in_new_dash = true
and is_missing_id_deal = false
and is_test_account = false
) )
select select

View file

@ -0,0 +1,15 @@
{% set test_account_claim_type = "('TESTACCOUNT')" %}
{% set test_account_claim_value = "('1')" %}
{{
config(
materialized="table",
)
}}
with stg_core__claim as (select * from {{ ref("stg_core__claim") }})
select distinct id_user
from stg_core__claim
where
upper(claim_type) in {{ test_account_claim_type }}
and claim_value in {{ test_account_claim_value }}

View file

@ -11,7 +11,8 @@ with
), ),
int_core__verification_request_booking_source as ( int_core__verification_request_booking_source as (
select * from {{ ref("int_core__verification_request_booking_source") }} select * from {{ ref("int_core__verification_request_booking_source") }}
) ),
int_core__user_host as (select * from {{ ref("int_core__user_host") }})
select select
vr.id_verification_request, vr.id_verification_request,
vr.uuid_verification_request, vr.uuid_verification_request,
@ -56,6 +57,9 @@ select
vr.updated_date_utc, vr.updated_date_utc,
vr.dwh_extracted_at_utc vr.dwh_extracted_at_utc
from stg_core__verification_request vr from stg_core__verification_request vr
-- The following join is to ensure we include only Users categorised as Hosts,
-- which in turn ensures the removal of test accounts.
inner join int_core__user_host uh on vr.id_user_host = uh.id_user_host
left join left join
int_core__verification_request_completeness completeness int_core__verification_request_completeness completeness
on vr.id_verification_request = completeness.id_verification_request on vr.id_verification_request = completeness.id_verification_request

View file

@ -171,6 +171,11 @@ models:
during check-in. during check-in.
- name: int_core__unified_user - name: int_core__unified_user
description: |
A model that unifies the user information from the backend.
It contains information on the user, such as name, email, phone number,
billing country, account type, and other relevant details.
Be aware that this model excludes any test account.
columns: columns:
- name: id_user - name: id_user
data_type: character varying data_type: character varying
@ -301,16 +306,9 @@ models:
- name: id_user_verification_status - name: id_user_verification_status
data_type: bigint data_type: bigint
description: The ID of the user verification status. description: The ID of the user verification status.
- name: is_test_account
data_type: boolean
description: |
True if the user is a test account. False otherwise.
Note that there might be other users that are test accounts
that are not controlled by this field. However it provides
a first level of filtering.
- name: int_core__vr_check_in_cover - name: int_core__vr_check_in_cover
description: | description: |
This tables holds information on verification requests with Ckeck-in Hero This tables holds information on verification requests with Ckeck-in Hero
available for the guests. available for the guests.
@ -836,15 +834,15 @@ models:
This model is restricted to active user so it doesn't include historical data This model is restricted to active user so it doesn't include historical data
like users that had check-in cover but are currently inactive. like users that had check-in cover but are currently inactive.
The new candidate version for this model, v2, changes the strategy to The new candidate version for this model, v2, changes the strategy to
define which hosts are offering check in cover. v1 used to look at define which hosts are offering check in cover. v1 used to look at
the verification set configurations and added hosts which had the cover in the verification set configurations and added hosts which had the cover in
it. v2 instead simply assumes that hosts that have had Guest Journeys it. v2 instead simply assumes that hosts that have had Guest Journeys
which offer the cover in the last 30 natural days are relevant. This is which offer the cover in the last 30 natural days are relevant. This is
simpler, but considered good enough. This decision was made as part of the simpler, but considered good enough. This decision was made as part of the
migration to the Guest Products data model, which would have made sticking migration to the Guest Products data model, which would have made sticking
to the old way extremely complex and error prone. to the old way extremely complex and error prone.
versions: versions:
- v: 1 - v: 1
- v: 2 - v: 2
@ -1656,13 +1654,6 @@ models:
data_type: string data_type: string
description: | description: |
Name of the account type. Can be null and might be not up-to-date. Name of the account type. Can be null and might be not up-to-date.
- name: is_test_account
data_type: boolean
description: |
Flag to identify if the user is a test account.
Can be null and might be not up-to-date. Be aware that
this might not include all test accounts, but it provides
a first level of filtering.
- name: id_billing_country - name: id_billing_country
data_type: integer data_type: integer
description: | description: |
@ -6498,3 +6489,19 @@ models:
be false as status only reflects the latest transition. be false as status only reflects the latest transition.
data_tests: data_tests:
- not_null - not_null
- name: int_core__user_test_account
description: |
A simple model that holds all the test accounts in the system.
This is used to identify test accounts and filter them out from
production data.
columns:
- name: id_user
data_type: character varying
description: |
Unique identifier of the user that is a test account.
It cannot be null.
data_tests:
- not_null
- unique

View file

@ -38,7 +38,6 @@ with
-- Filters to replicate New Dash Overview -- Filters to replicate New Dash Overview
icuh.is_user_in_new_dash = true icuh.is_user_in_new_dash = true
and icuh.is_missing_id_deal = false and icuh.is_missing_id_deal = false
and icuh.is_test_account = false
-- Filter to only select new business (i.e., not migrated from Old Dash) -- Filter to only select new business (i.e., not migrated from Old Dash)
and icuh.has_user_moved_from_old_dash = false and icuh.has_user_moved_from_old_dash = false
group by 1, 2, 3, 4, 5, 6, 7, 8 group by 1, 2, 3, 4, 5, 6, 7, 8
@ -88,7 +87,6 @@ with
-- Filters to replicate New Dash Overview -- Filters to replicate New Dash Overview
icuh.is_user_in_new_dash = true icuh.is_user_in_new_dash = true
and icuh.is_missing_id_deal = false and icuh.is_missing_id_deal = false
and icuh.is_test_account = false
-- Filter to only select new business (i.e., not migrated from Old Dash) -- Filter to only select new business (i.e., not migrated from Old Dash)
and icuh.has_user_moved_from_old_dash = false and icuh.has_user_moved_from_old_dash = false
group by 1 group by 1