Merged PR 2125: Fixing accommodation host
Fixing accommodation host by using accommodation to user, after discussion with Ben R. This improves data quality, even though there's some duplicates removal. I checked and it effectively removes accommodations that mostly were considered as 'Never Booked', thus not a massive impact is expected for the business kpis. But in any case, let's do things properly :) Related work items: #17538
This commit is contained in:
parent
6446dfdb9e
commit
5c12dd3b13
3 changed files with 49 additions and 2 deletions
|
|
@ -1,10 +1,16 @@
|
||||||
{{ config(materialized="table", unique_key="id_accommodation") }}
|
{{ config(materialized="table", unique_key="id_accommodation") }}
|
||||||
with
|
with
|
||||||
stg_core__accommodation as (select * from {{ ref("stg_core__accommodation") }}),
|
stg_core__accommodation as (select * from {{ ref("stg_core__accommodation") }}),
|
||||||
|
int_core__unique_accommodation_to_user as (
|
||||||
|
select * from {{ ref("int_core__unique_accommodation_to_user") }}
|
||||||
|
),
|
||||||
int_core__country as (select * from {{ ref("int_core__country") }})
|
int_core__country as (select * from {{ ref("int_core__country") }})
|
||||||
|
|
||||||
select
|
select
|
||||||
sca.id_accommodation,
|
sca.id_accommodation,
|
||||||
sca.id_creating_user as id_user_host,
|
-- Prioritise user coming from AccommodationToUser. If not found, use
|
||||||
|
-- the one from Accommodation.
|
||||||
|
coalesce(scatu.id_user_owner, sca.id_creating_user) as id_user_host,
|
||||||
sca.id_payment_validation_set,
|
sca.id_payment_validation_set,
|
||||||
sca.friendly_name,
|
sca.friendly_name,
|
||||||
icc.iso_2 as country_iso_2,
|
icc.iso_2 as country_iso_2,
|
||||||
|
|
@ -28,4 +34,7 @@ select
|
||||||
date(sca.updated_at_utc) as updated_date_utc,
|
date(sca.updated_at_utc) as updated_date_utc,
|
||||||
sca.dwh_extracted_at_utc
|
sca.dwh_extracted_at_utc
|
||||||
from stg_core__accommodation sca
|
from stg_core__accommodation sca
|
||||||
|
left join
|
||||||
|
int_core__unique_accommodation_to_user scatu
|
||||||
|
on sca.id_accommodation = scatu.id_accommodation
|
||||||
left join int_core__country icc on sca.id_country = icc.id_country
|
left join int_core__country icc on sca.id_country = icc.id_country
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ This model provides Month-To-Date (MTD) accommodation lifecycle based on booking
|
||||||
with
|
with
|
||||||
int_core__bookings as (select * from {{ ref("int_core__bookings") }}),
|
int_core__bookings as (select * from {{ ref("int_core__bookings") }}),
|
||||||
stg_core__accommodation as (select * from {{ ref("stg_core__accommodation") }}),
|
stg_core__accommodation as (select * from {{ ref("stg_core__accommodation") }}),
|
||||||
|
int_core__unique_accommodation_to_user as (select * from {{ ref("int_core__unique_accommodation_to_user")}}),
|
||||||
int_dates_mtd as (select * from {{ ref("int_dates_mtd") }}),
|
int_dates_mtd as (select * from {{ ref("int_dates_mtd") }}),
|
||||||
|
|
||||||
booked_days_per_accommodation as (
|
booked_days_per_accommodation as (
|
||||||
|
|
@ -29,9 +30,12 @@ with
|
||||||
max(b.previous_booked_date) as second_to_last_time_booked_date_utc
|
max(b.previous_booked_date) as second_to_last_time_booked_date_utc
|
||||||
from int_dates_mtd d
|
from int_dates_mtd d
|
||||||
inner join stg_core__accommodation a on d.date >= date(a.created_at_utc)
|
inner join stg_core__accommodation a on d.date >= date(a.created_at_utc)
|
||||||
|
-- Inclusion of unique accommodation to user to force that the
|
||||||
|
-- accommodation needs to have a host assigned into it.
|
||||||
|
inner join int_core__unique_accommodation_to_user uatu on uatu.id_accommodation = a.id_accommodation
|
||||||
left join
|
left join
|
||||||
booked_days_per_accommodation b
|
booked_days_per_accommodation b
|
||||||
on a.id_accommodation = b.id_accommodation
|
on uatu.id_accommodation = b.id_accommodation
|
||||||
and d.date >= b.created_date_utc
|
and d.date >= b.created_date_utc
|
||||||
group by d.date, a.id_accommodation, date(a.created_at_utc)
|
group by d.date, a.id_accommodation, date(a.created_at_utc)
|
||||||
),
|
),
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,34 @@
|
||||||
|
|
||||||
|
/*
|
||||||
|
This model selects the last user host assigned to an accommodation
|
||||||
|
based on the maximum id_accommodation_to_user. This means, if we have
|
||||||
|
an Accommodation A, the original table AccommodationToUser could have
|
||||||
|
2 hosts: H1 and H2. If the id_accommodation_to_user for H1 is 123 while
|
||||||
|
for H2 is 321, this model will attribute the accommodation A to H2,
|
||||||
|
because 321 > 123.
|
||||||
|
|
||||||
|
At the moment of creating this model 26th June 2024, there were only
|
||||||
|
93 accommodations that had at maximum 2 hosts, over a total of ~140k
|
||||||
|
accommodations. Thus, even if this method is not perfect, it ensures
|
||||||
|
that there's no accommodation nasty duplications in following stages.
|
||||||
|
|
||||||
|
*/
|
||||||
|
{{ config(materialized="table", unique_key="id_accommodation") }}
|
||||||
|
|
||||||
|
with
|
||||||
|
stg_core__accommodation_to_user as (
|
||||||
|
select * from {{ ref("stg_core__accommodation_to_user") }}
|
||||||
|
),
|
||||||
|
duplicated_accommodation_to_user as (
|
||||||
|
select
|
||||||
|
id_user_owner,
|
||||||
|
id_accommodation,
|
||||||
|
id_accommodation_to_user,
|
||||||
|
row_number() over (
|
||||||
|
partition by id_accommodation order by id_accommodation_to_user desc
|
||||||
|
) as rn
|
||||||
|
from stg_core__accommodation_to_user scatu
|
||||||
|
)
|
||||||
|
select id_user_owner, id_accommodation
|
||||||
|
from duplicated_accommodation_to_user
|
||||||
|
where rn = 1
|
||||||
Loading…
Add table
Add a link
Reference in a new issue