Merged PR 2084: Adding int_core__accommodation
Adding int_core__accommodation Includes both: - Main information of the accommodation, mostly coming from stg_core__accommodation and int_core__country. - Listing lifecycle computation, based on the created bookings from stg_core__bookings. It's just the current state, no history. Some considerations: - I opted to use stg_core__bookings and not int_core__bookings. Main reason is in case at some point we want to add listing-based information to the booking table, it would avoid cyclic references. - I opted to keep all the logic of 1) accommodation info and 2) lifecycle in the same model. This could be easily split into: lifecycle first that reads uniquely from staging and then the int_core__accommodation that could read from the staging version to retrieve accommodation attributes + the lifecycle one. Up to you I'd suggest to review first the documentation in schema since it explains the logic applied. Notion page linked to this task: https://www.notion.so/knowyourguest-superhog/Listing-lifecycle-4dc0311b21ca44f8859969e419872ebd Related work items: #17312
This commit is contained in:
parent
80120e68a2
commit
fe93f594f5
2 changed files with 359 additions and 1 deletions
168
models/intermediate/core/int_core__accommodation.sql
Normal file
168
models/intermediate/core/int_core__accommodation.sql
Normal file
|
|
@ -0,0 +1,168 @@
|
||||||
|
{{ config(materialized="table", unique_key="id_accommodation") }}
|
||||||
|
with
|
||||||
|
stg_core__booking as (select * from {{ ref("stg_core__booking") }}),
|
||||||
|
stg_core__accommodation as (select * from {{ ref("stg_core__accommodation") }}),
|
||||||
|
int_core__country as (select * from {{ ref("int_core__country") }}),
|
||||||
|
ranked_bookings as (
|
||||||
|
select
|
||||||
|
scb.id_accommodation,
|
||||||
|
scb.created_at_utc,
|
||||||
|
row_number() over (
|
||||||
|
partition by scb.id_accommodation order by scb.created_at_utc desc
|
||||||
|
) as reverse_booking_rank
|
||||||
|
from stg_core__booking scb
|
||||||
|
),
|
||||||
|
first_and_last_bookings as (
|
||||||
|
select
|
||||||
|
id_accommodation,
|
||||||
|
min(created_at_utc) as first_time_booked_at_utc,
|
||||||
|
max(created_at_utc) as last_time_booked_at_utc
|
||||||
|
from ranked_bookings
|
||||||
|
group by id_accommodation
|
||||||
|
),
|
||||||
|
accommodation_booking_main_dates as (
|
||||||
|
select
|
||||||
|
fb.id_accommodation,
|
||||||
|
fb.first_time_booked_at_utc,
|
||||||
|
fb.last_time_booked_at_utc,
|
||||||
|
rb.created_at_utc as second_to_last_time_booked_at_utc
|
||||||
|
from first_and_last_bookings fb
|
||||||
|
left join
|
||||||
|
ranked_bookings rb
|
||||||
|
on fb.id_accommodation = rb.id_accommodation
|
||||||
|
-- to select second to last booking, it's the 2nd apperance in descending creation time order
|
||||||
|
and rb.reverse_booking_rank = 2
|
||||||
|
),
|
||||||
|
accommodation_with_lifecycle_dates as (
|
||||||
|
select
|
||||||
|
sca.id_accommodation,
|
||||||
|
sca.id_creating_user as id_user_host,
|
||||||
|
sca.id_payment_validation_set,
|
||||||
|
sca.friendly_name,
|
||||||
|
icc.iso_2 as country_iso_2,
|
||||||
|
icc.country_name,
|
||||||
|
icc.preferred_iso4217_code as country_preferred_currency_code,
|
||||||
|
sca.is_active,
|
||||||
|
sca.town,
|
||||||
|
sca.postcode,
|
||||||
|
sca.address_line_1,
|
||||||
|
sca.address_line_2,
|
||||||
|
sca.verification_level,
|
||||||
|
sca.floor_area,
|
||||||
|
sca.number_of_floors,
|
||||||
|
sca.number_of_bedrooms,
|
||||||
|
sca.number_of_bathrooms,
|
||||||
|
sca.number_of_other_rooms,
|
||||||
|
sca.construction_details,
|
||||||
|
sca.created_at_utc,
|
||||||
|
date(sca.created_at_utc) as created_date_utc,
|
||||||
|
sca.updated_at_utc,
|
||||||
|
date(sca.updated_at_utc) as updated_date_utc,
|
||||||
|
abmd.first_time_booked_at_utc,
|
||||||
|
date(abmd.first_time_booked_at_utc) as first_time_booked_date_utc,
|
||||||
|
abmd.last_time_booked_at_utc,
|
||||||
|
date(abmd.last_time_booked_at_utc) as last_time_booked_date_utc,
|
||||||
|
abmd.second_to_last_time_booked_at_utc,
|
||||||
|
date(abmd.second_to_last_time_booked_at_utc) as second_to_last_time_booked_date_utc,
|
||||||
|
sca.dwh_extracted_at_utc,
|
||||||
|
-- adding updated_at to account for temporal variability of the status and booking-related dates
|
||||||
|
date_trunc('month', now()::date) as current_month,
|
||||||
|
case when abmd.first_time_booked_at_utc is not null then true else false end as accommodation_has_at_least_one_booking,
|
||||||
|
case when date_trunc('month', created_at_utc) = date_trunc('month', now()::date) then true else false end as accommodation_was_created_this_month,
|
||||||
|
case when date_trunc('month', first_time_booked_at_utc) = date_trunc('month', last_time_booked_at_utc) then true else false end as accommodation_has_first_booking,
|
||||||
|
case when date_trunc('month', last_time_booked_at_utc) = date_trunc('month', now()::date) then true else false end as has_been_booked_within_current_month,
|
||||||
|
case when date_trunc('month', last_time_booked_at_utc) + interval '6 months' > date_trunc('month', now()::date) then true else false end as has_been_booked_within_last_6_months,
|
||||||
|
case when date_trunc('month', last_time_booked_at_utc) + interval '12 months' > date_trunc('month', now()::date) then true else false end as has_been_booked_within_last_12_months,
|
||||||
|
case when date_trunc('month', last_time_booked_at_utc) + interval '12 months' = date_trunc('month', now()::date) then true else false end as last_booking_was_12_months_ago,
|
||||||
|
case when date_trunc('month', second_to_last_time_booked_at_utc) + interval '12 months' < date_trunc('month', last_time_booked_at_utc) then true else false end as had_previous_booking_more_than_12_months_before_the_last
|
||||||
|
from stg_core__accommodation sca
|
||||||
|
left join int_core__country icc on sca.id_country = icc.id_country
|
||||||
|
left join
|
||||||
|
accommodation_booking_main_dates abmd
|
||||||
|
on sca.id_accommodation = abmd.id_accommodation
|
||||||
|
)
|
||||||
|
select
|
||||||
|
id_accommodation,
|
||||||
|
id_user_host,
|
||||||
|
id_payment_validation_set,
|
||||||
|
friendly_name,
|
||||||
|
country_iso_2,
|
||||||
|
country_name,
|
||||||
|
country_preferred_currency_code,
|
||||||
|
is_active,
|
||||||
|
town,
|
||||||
|
postcode,
|
||||||
|
address_line_1,
|
||||||
|
address_line_2,
|
||||||
|
verification_level,
|
||||||
|
floor_area,
|
||||||
|
number_of_floors,
|
||||||
|
number_of_bedrooms,
|
||||||
|
number_of_bathrooms,
|
||||||
|
number_of_other_rooms,
|
||||||
|
construction_details,
|
||||||
|
case
|
||||||
|
-- 01-New: The listing has been created this month and has not had any booking
|
||||||
|
when
|
||||||
|
not accommodation_has_at_least_one_booking
|
||||||
|
and accommodation_was_created_this_month
|
||||||
|
then '01-New'
|
||||||
|
-- 02-Never Booked: The listing has been created before this month and has not had any booking
|
||||||
|
when
|
||||||
|
not accommodation_has_at_least_one_booking
|
||||||
|
and not accommodation_was_created_this_month
|
||||||
|
then '02-Never Booked'
|
||||||
|
-- 03-First Time Booked: The listing has been booked for the first time and it has been created this month
|
||||||
|
when
|
||||||
|
accommodation_has_first_booking
|
||||||
|
and has_been_booked_within_current_month
|
||||||
|
then '03-First Time Booked'
|
||||||
|
-- 04-Active: The listing has had at least 1 booking in its history and it's been less than 12 months since the last booking
|
||||||
|
-- and is not reactivated and is not FTB
|
||||||
|
when
|
||||||
|
accommodation_has_at_least_one_booking
|
||||||
|
and has_been_booked_within_last_12_months
|
||||||
|
-- not reactivated
|
||||||
|
and not (
|
||||||
|
had_previous_booking_more_than_12_months_before_the_last
|
||||||
|
and has_been_booked_within_current_month
|
||||||
|
)
|
||||||
|
-- not FTB
|
||||||
|
and not (
|
||||||
|
accommodation_has_first_booking
|
||||||
|
and has_been_booked_within_current_month
|
||||||
|
)
|
||||||
|
then '04-Active'
|
||||||
|
-- 05-Churning: The listing has been booked at least once and it's been 12 months since the last booking
|
||||||
|
when
|
||||||
|
accommodation_has_at_least_one_booking
|
||||||
|
and last_booking_was_12_months_ago
|
||||||
|
then '05-Churning'
|
||||||
|
-- 06-Inactive: The listing has been booked at least once and it's been more than 12 months since the last booking
|
||||||
|
when
|
||||||
|
accommodation_has_at_least_one_booking
|
||||||
|
and not has_been_booked_within_last_12_months
|
||||||
|
and not last_booking_was_12_months_ago
|
||||||
|
then '06-Inactive'
|
||||||
|
-- 07-Reactivated: The listing was churned/inactive but has had a new booking this month
|
||||||
|
when
|
||||||
|
had_previous_booking_more_than_12_months_before_the_last
|
||||||
|
and has_been_booked_within_current_month
|
||||||
|
then '07-Reactivated'
|
||||||
|
else Null
|
||||||
|
end as accommodation_lifecycle_state,
|
||||||
|
has_been_booked_within_current_month,
|
||||||
|
has_been_booked_within_last_6_months,
|
||||||
|
has_been_booked_within_last_12_months,
|
||||||
|
created_at_utc,
|
||||||
|
created_date_utc,
|
||||||
|
updated_at_utc,
|
||||||
|
updated_date_utc,
|
||||||
|
first_time_booked_at_utc,
|
||||||
|
first_time_booked_date_utc,
|
||||||
|
last_time_booked_at_utc,
|
||||||
|
last_time_booked_date_utc,
|
||||||
|
second_to_last_time_booked_at_utc,
|
||||||
|
second_to_last_time_booked_date_utc,
|
||||||
|
dwh_extracted_at_utc
|
||||||
|
from accommodation_with_lifecycle_dates
|
||||||
|
|
@ -495,7 +495,197 @@ models:
|
||||||
These codes are part of the ISO 4217 standard.
|
These codes are part of the ISO 4217 standard.
|
||||||
tests:
|
tests:
|
||||||
- not_null
|
- not_null
|
||||||
|
|
||||||
|
|
||||||
|
- name: int_core__accommodation
|
||||||
|
description: |
|
||||||
|
This model contains information regarding accommodations, also known as listings.
|
||||||
|
It contains information regarding the host this accommodation is linked to,
|
||||||
|
the geographic details, the preferred currency according to the country, details about
|
||||||
|
the listing itself (floors, bedrooms, etc) and time-related information of when the
|
||||||
|
listing was created, booked for the first time, last time, and second-to-last time.
|
||||||
|
|
||||||
|
The information regarding the booking-related time allows for the current status of
|
||||||
|
any listing regarding its activity. There's no history, it's just the most up-to-date
|
||||||
|
status of the listing activity. This information is encapsulated in the following columns:
|
||||||
|
|
||||||
|
accommodation_lifecycle_state: contains one of the following states
|
||||||
|
- 01-New: Listings that have been created in the current month, without bookings
|
||||||
|
- 02-Never Booked: Listings that have been created before the current month, without bookings.
|
||||||
|
- 03-First Time Booked: Listings that have been booked for the first time in the current month.
|
||||||
|
- 04-Active: Listings that have booking activity in the past 12 months (that are not FTB nor reactivated)
|
||||||
|
- 05-Churning: Listings that are becoming inactive because of lack of bookings in the past 12 months
|
||||||
|
- 06-Inactive: Listings that have not had a booking for more than 12 months.
|
||||||
|
- 07-Reactivated: Listings that have had a booking in the current month that were inactive or churning before.
|
||||||
|
- Finally, if none of the logic applies, which should not happen, null will be set and a dbt alert will raise.
|
||||||
|
|
||||||
|
Since the states of Active, First Time Booked and Reactivated indicate certain booking activity and are
|
||||||
|
mutually exclusive, the model also provides information of the recency of the bookings by the following
|
||||||
|
booleans:
|
||||||
|
- has_been_booked_in_1_month: If a listing has had a booking created in the current month
|
||||||
|
- has_been_booked_in_6_months: If a listing has had a booking created in the past 6 months
|
||||||
|
- has_been_booked_within_last_12_months: If a listing has had a booking created in the past 12 months
|
||||||
|
Note that if a listing has had a booking created this month, all 3 columns will be true. Similarly,
|
||||||
|
if the last booking created to a listing was 5 months ago, only the column has_been_booked_in_1_month
|
||||||
|
will be false; while the other 2 will be true.
|
||||||
|
|
||||||
|
|
||||||
|
columns:
|
||||||
|
- name: id_accommodation
|
||||||
|
data_type: bigint
|
||||||
|
description: Id of the accommodation or listing. It's the unique key for this model.
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
|
- unique
|
||||||
|
|
||||||
|
- name: id_user_host
|
||||||
|
data_type: character varying
|
||||||
|
description: The unique ID for the host. Can be null.
|
||||||
|
|
||||||
|
- name: id_payment_validation_set
|
||||||
|
data_type: bigint
|
||||||
|
description: Id of the payment validation set linked to a listing. Can be null.
|
||||||
|
|
||||||
|
- name: friendly_name
|
||||||
|
data_type: character varying
|
||||||
|
|
||||||
|
- name: country_iso_2
|
||||||
|
data_type: char(2)
|
||||||
|
description: ISO 3166-1 alpha-2 country code where the listing is located.
|
||||||
|
|
||||||
|
- name: country_name
|
||||||
|
data_type: character varying
|
||||||
|
description: Name of the country where the listing is located.
|
||||||
|
|
||||||
|
- name: country_preferred_currency_code
|
||||||
|
data_type: char(3)
|
||||||
|
description: |
|
||||||
|
Three-letter code assigned to the preferred currency for a given country by the ISO.
|
||||||
|
These codes are part of the ISO 4217 standard. Keep in mind this are preferred, not
|
||||||
|
necessarily the actual currency.
|
||||||
|
|
||||||
|
- name: is_active
|
||||||
|
data_type: boolean
|
||||||
|
description: |
|
||||||
|
Boolean to indicate if the listing is active or not. If false, this is considered as a
|
||||||
|
hard deactivation - meaning no more bookings can be assigned to this listing. However,
|
||||||
|
even if a listing is active, that does not necessarily mean that it's receiving bookings.
|
||||||
|
Do not confuse this column with the lifecycle activity of a listing.
|
||||||
|
|
||||||
|
- name: town
|
||||||
|
data_type: character varying
|
||||||
|
|
||||||
|
- name: postcode
|
||||||
|
data_type: character varying
|
||||||
|
|
||||||
|
- name: address_line_1
|
||||||
|
data_type: character varying
|
||||||
|
|
||||||
|
- name: address_line_2
|
||||||
|
data_type: character varying
|
||||||
|
|
||||||
|
- name: verification_level
|
||||||
|
data_type: integer
|
||||||
|
|
||||||
|
- name: floor_area
|
||||||
|
data_type: integer
|
||||||
|
|
||||||
|
- name: number_of_floors
|
||||||
|
data_type: integer
|
||||||
|
|
||||||
|
- name: number_of_bedrooms
|
||||||
|
data_type: integer
|
||||||
|
|
||||||
|
- name: number_of_bathrooms
|
||||||
|
data_type: integer
|
||||||
|
|
||||||
|
- name: number_of_other_rooms
|
||||||
|
data_type: integer
|
||||||
|
|
||||||
|
- name: construction_details
|
||||||
|
data_type: character varying
|
||||||
|
|
||||||
|
- name: accommodation_lifecycle_state
|
||||||
|
data_type: character varying
|
||||||
|
description: |
|
||||||
|
Contains the lifecycle state of a Listing. The accepted values are:
|
||||||
|
01-New, 02-Never Booked, 03-First Time Booked, 04-Active, 05-Churning, 06-Inactive,
|
||||||
|
07-Reactivated. Failing to implement the logic will result in alert.
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: has_been_booked_within_current_month
|
||||||
|
data_type: boolean
|
||||||
|
description: If the listing has had a booking created in the current month.
|
||||||
|
|
||||||
|
- name: has_been_booked_within_last_6_months
|
||||||
|
data_type: boolean
|
||||||
|
description: If the listing has had a booking created in the past 6 months.
|
||||||
|
|
||||||
|
- name: has_been_booked_within_last_12_months
|
||||||
|
data_type: boolean
|
||||||
|
description: If the listing has had a booking created in the past 12 months.
|
||||||
|
|
||||||
|
- name: created_at_utc
|
||||||
|
data_type: timestamp
|
||||||
|
description: Timestamp of when the listing was created. Cannot be null.
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
|
|
||||||
|
- name: created_date_utc
|
||||||
|
data_type: date
|
||||||
|
description: Date of when the listing was created
|
||||||
|
|
||||||
|
- name: updated_at_utc
|
||||||
|
data_type: timestamp
|
||||||
|
description: Timestamp of when the listing was last updated according to the backend.
|
||||||
|
|
||||||
|
- name: updated_date_utc
|
||||||
|
data_type: date
|
||||||
|
description: Date of when the listing was last updated according to the backend.
|
||||||
|
|
||||||
|
- name: first_time_booked_at_utc
|
||||||
|
data_type: timestamp
|
||||||
|
description: |
|
||||||
|
Timestamp of the first booking created for a given listing. Can be null if the listing
|
||||||
|
has never had a booking associated with it.
|
||||||
|
|
||||||
|
- name: first_time_booked_date_utc
|
||||||
|
data_type: date
|
||||||
|
description: |
|
||||||
|
Date of the first booking created for a given listing. Can be null if the listing
|
||||||
|
has never had a booking associated with it.
|
||||||
|
|
||||||
|
- name: last_time_booked_at_utc
|
||||||
|
data_type: timestamp
|
||||||
|
description: |
|
||||||
|
Timestamp of the last booking created for a given listing. Can be null if the listing
|
||||||
|
has never had a booking associated with it. Can be the same as first_time_booked_at_utc
|
||||||
|
if the listing only had 1 booking in its history.
|
||||||
|
|
||||||
|
- name: last_time_booked_date_utc
|
||||||
|
data_type: date
|
||||||
|
description: |
|
||||||
|
Date of the last booking created for a given listing. Can be null if the listing
|
||||||
|
has never had a booking associated with it. Can be the same as first_time_booked_date_utc
|
||||||
|
if the listing only had 1 booking in its history.
|
||||||
|
|
||||||
|
- name: second_to_last_time_booked_at_utc
|
||||||
|
data_type: timestamp
|
||||||
|
description: |
|
||||||
|
Timestamp of the second-to-last booking created for a given listing, meaning the creation
|
||||||
|
time of the booking that precedes the last one. It's relevant for the reactivation computation
|
||||||
|
on the lifecycle. Can be null if the listing has never had a booking associated with it or if
|
||||||
|
the listing only had 1 booking in its history.
|
||||||
|
|
||||||
|
- name: second_to_last_time_booked_date_utc
|
||||||
|
data_type: date
|
||||||
|
description: |
|
||||||
|
Date of the second-to-last booking created for a given listing, meaning the creation
|
||||||
|
date of the booking that precedes the last one. It's relevant for the reactivation computation
|
||||||
|
on the lifecycle. Can be null if the listing has never had a booking associated with it or if
|
||||||
|
the listing only had 1 booking in its history.
|
||||||
|
|
||||||
|
- name: dwh_extracted_at_utc
|
||||||
|
data_type: timestamp
|
||||||
|
description: Timestamp of when the accommodation record was extracted from the backend into the DWH.
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue