addressed Pablo's comments, removed the repetitive casts, added some nut_null tests and fixed some of the names and descriptions discrepancies

This commit is contained in:
Joaquin Ossa 2024-08-29 14:25:00 +02:00
parent ad2eb2544c
commit 6adc424963
6 changed files with 96 additions and 48 deletions

View file

@ -11,15 +11,15 @@ with
select
id_booking,
is_cancelled,
id_user_partner,
channel,
cast(checkin_at_utc as date) as checkin_date_utc,
cast(checkout_at_utc as date) as checkout_date_utc,
checkin_date_utc,
checkout_date_utc,
creation_at_utc,
verification_status,
id_user_host,
case
when verification_status in {{ ok_status }}
then cast(nightly_fee_local as float) * number_nights
then nightly_fee_local * number_nights
else 0
end as ok_status_fee,
case
@ -30,33 +30,50 @@ with
to_char(creation_at_utc, 'YYYY-MM') as year_month_created,
to_char(checkout_at_utc, 'YYYY-MM') as year_month_checkout
from int_edeposit__verifications
where version = 'V2' -- This version V2 indicates records for e-deposit
where version = 'V2'
),
calculated_metrics as (
select
id_user_partner,
year_month_created,
year_month_checkout,
count(distinct id_booking) as bookings_per_month,
sum(cast(is_cancelled as integer)) as cancelled_per_month,
sum(cast(is_cancelled as integer))::decimal
/ count(distinct id_booking) as cancelled_ratio,
sum(ok_status_fee) as sum_ok_status_fee,
sum(rejected_fee) as sum_rejected_fee,
case
when
sum(cast(is_cancelled as integer))::decimal
/ count(distinct id_booking)
>= {{ cancellation_threshold }}
then true
else false
end as is_cancellation_threshold_surpassed
from edeposit_records
group by year_month_created, year_month_checkout, id_user_partner
)
select
id_user_partner,
year_month_created,
year_month_checkout,
id_user_host,
count(distinct id_booking) as bookings_per_month,
sum(cast(is_cancelled as integer)) as cancelled_per_month,
sum(cast(is_cancelled as integer))::decimal
/ count(distinct id_booking) as cancelled_ratio,
bookings_per_month,
cancelled_per_month,
cancelled_ratio,
is_cancellation_threshold_surpassed,
case
when
sum(cast(is_cancelled as integer))::decimal / count(distinct id_booking)
>= {{ cancellation_threshold }}
then sum(cast(is_cancelled as integer)) * {{ cancellation_fee }}
when is_cancellation_threshold_surpassed
then cancelled_per_month * {{ cancellation_fee }}
else 0
end as sum_cancelled_fee,
sum(ok_status_fee) as sum_ok_status_fee,
sum(rejected_fee) as sum_rejected_fee,
case
when
sum(cast(is_cancelled as integer))::decimal / count(distinct id_booking)
>= {{ cancellation_threshold }}
then sum(cast(is_cancelled as integer)) * {{ cancellation_fee }}
sum_ok_status_fee,
sum_rejected_fee,
sum_ok_status_fee
+ sum_rejected_fee
+ case
when cancelled_ratio >= {{ cancellation_threshold }}
then cancelled_per_month * {{ cancellation_fee }}
else 0
end
+ sum(ok_status_fee)
+ sum(rejected_fee) as total_revenue
from edeposit_records
group by year_month_created, year_month_checkout, id_user_host
end as total_revenue
from calculated_metrics

View file

@ -5,9 +5,9 @@ with
select
-- note that these ids are not the same as the ones found in Core DWH
-- they are completely unrelated
id as id_verification,
id_verification,
id_booking,
id_user as id_user_host,
id_user as id_user_partner,
id_listing as id_accommodation,
version, -- V1 for Guesty and V2 for E-deposit
case
@ -15,14 +15,16 @@ select
end as verification_source,
verification_status,
verification_status_reason,
nightly_fee_local,
cast(nightly_fee_local as float),
cast(checkout_at_utc as date) - cast(checkin_at_utc as date) as number_nights,
email_flag,
phone_flag,
watch_list,
channel,
checkin_at_utc,
cast(checkin_at_utc as date) as checkin_date_utc,
checkout_at_utc,
cast(checkout_at_utc as date) as checkout_date_utc,
is_cancelled,
cancelled_at_utc,
user_email,

View file

@ -19,9 +19,11 @@ models:
data_type: text
description: "unique Superhog generated id for a booking"
- name: id_user_host
- name: id_user_partner
data_type: text
description: "unique Superhog generated id for host"
description: "unique Superhog generated id for partner"
tests:
- not_null
- name: id_accommodation
data_type: text
@ -56,7 +58,7 @@ models:
description: "short explanation for status"
- name: nightly_fee_local
data_type: text
data_type: double precision
description: "fee charged per night"
- name: number_nights
@ -83,10 +85,18 @@ models:
data_type: timestamp without time zone
description: "Timestamp of checkin for the booking"
- name: checkin_date_utc
data_type: date
description: "Timestamp of checkin for the booking"
- name: checkout_at_utc
data_type: timestamp without time zone
description: "Timestamp of checkout for the booking"
- name: checkout_date_utc
data_type: date
description: "Timestamp of checkout for the booking"
- name: is_cancelled
data_type: boolean
description: ""
@ -184,19 +194,19 @@ models:
combination_of_columns:
- year_month_created
- year_month_checkout
- id_user_host
- id_user_partner
columns:
- name: id_user_partner
data_type: text
description: "unique id value for user partner"
- name: year_month_created
data_type: text
description: "first day of month of created date"
description: "year and month of created date"
- name: year_month_checkout
data_type: text
description: "first day of month of check-out date"
- name: id_user_host
data_type: text
description: "unique id value for user host"
description: "year and month of check-out date"
- name: bookings_per_month
data_type: bigint
@ -210,6 +220,12 @@ models:
data_type: numeric
description: "ratio of cancelled bookings over total bookings"
- name: is_cancellation_threshold_surpassed
data_type: boolean
description: "true if the cancellation ratio is higher than the set threshold"
tests:
- not_null
- name: sum_cancelled_fee
data_type: numeric
description: "sum of fees charged for cancelled bookings