addressed Pablo's comments, removed the repetitive casts, added some nut_null tests and fixed some of the names and descriptions discrepancies

This commit is contained in:
Joaquin Ossa 2024-08-29 14:25:00 +02:00
parent ad2eb2544c
commit 6adc424963
6 changed files with 96 additions and 48 deletions

View file

@ -11,15 +11,15 @@ with
select select
id_booking, id_booking,
is_cancelled, is_cancelled,
id_user_partner,
channel, channel,
cast(checkin_at_utc as date) as checkin_date_utc, checkin_date_utc,
cast(checkout_at_utc as date) as checkout_date_utc, checkout_date_utc,
creation_at_utc, creation_at_utc,
verification_status, verification_status,
id_user_host,
case case
when verification_status in {{ ok_status }} when verification_status in {{ ok_status }}
then cast(nightly_fee_local as float) * number_nights then nightly_fee_local * number_nights
else 0 else 0
end as ok_status_fee, end as ok_status_fee,
case case
@ -30,33 +30,50 @@ with
to_char(creation_at_utc, 'YYYY-MM') as year_month_created, to_char(creation_at_utc, 'YYYY-MM') as year_month_created,
to_char(checkout_at_utc, 'YYYY-MM') as year_month_checkout to_char(checkout_at_utc, 'YYYY-MM') as year_month_checkout
from int_edeposit__verifications from int_edeposit__verifications
where version = 'V2' -- This version V2 indicates records for e-deposit where version = 'V2'
),
calculated_metrics as (
select
id_user_partner,
year_month_created,
year_month_checkout,
count(distinct id_booking) as bookings_per_month,
sum(cast(is_cancelled as integer)) as cancelled_per_month,
sum(cast(is_cancelled as integer))::decimal
/ count(distinct id_booking) as cancelled_ratio,
sum(ok_status_fee) as sum_ok_status_fee,
sum(rejected_fee) as sum_rejected_fee,
case
when
sum(cast(is_cancelled as integer))::decimal
/ count(distinct id_booking)
>= {{ cancellation_threshold }}
then true
else false
end as is_cancellation_threshold_surpassed
from edeposit_records
group by year_month_created, year_month_checkout, id_user_partner
) )
select select
id_user_partner,
year_month_created, year_month_created,
year_month_checkout, year_month_checkout,
id_user_host, bookings_per_month,
count(distinct id_booking) as bookings_per_month, cancelled_per_month,
sum(cast(is_cancelled as integer)) as cancelled_per_month, cancelled_ratio,
sum(cast(is_cancelled as integer))::decimal is_cancellation_threshold_surpassed,
/ count(distinct id_booking) as cancelled_ratio,
case case
when when is_cancellation_threshold_surpassed
sum(cast(is_cancelled as integer))::decimal / count(distinct id_booking) then cancelled_per_month * {{ cancellation_fee }}
>= {{ cancellation_threshold }}
then sum(cast(is_cancelled as integer)) * {{ cancellation_fee }}
else 0 else 0
end as sum_cancelled_fee, end as sum_cancelled_fee,
sum(ok_status_fee) as sum_ok_status_fee, sum_ok_status_fee,
sum(rejected_fee) as sum_rejected_fee, sum_rejected_fee,
case sum_ok_status_fee
when + sum_rejected_fee
sum(cast(is_cancelled as integer))::decimal / count(distinct id_booking) + case
>= {{ cancellation_threshold }} when cancelled_ratio >= {{ cancellation_threshold }}
then sum(cast(is_cancelled as integer)) * {{ cancellation_fee }} then cancelled_per_month * {{ cancellation_fee }}
else 0 else 0
end end as total_revenue
+ sum(ok_status_fee) from calculated_metrics
+ sum(rejected_fee) as total_revenue
from edeposit_records
group by year_month_created, year_month_checkout, id_user_host

View file

@ -5,9 +5,9 @@ with
select select
-- note that these ids are not the same as the ones found in Core DWH -- note that these ids are not the same as the ones found in Core DWH
-- they are completely unrelated -- they are completely unrelated
id as id_verification, id_verification,
id_booking, id_booking,
id_user as id_user_host, id_user as id_user_partner,
id_listing as id_accommodation, id_listing as id_accommodation,
version, -- V1 for Guesty and V2 for E-deposit version, -- V1 for Guesty and V2 for E-deposit
case case
@ -15,14 +15,16 @@ select
end as verification_source, end as verification_source,
verification_status, verification_status,
verification_status_reason, verification_status_reason,
nightly_fee_local, cast(nightly_fee_local as float),
cast(checkout_at_utc as date) - cast(checkin_at_utc as date) as number_nights, cast(checkout_at_utc as date) - cast(checkin_at_utc as date) as number_nights,
email_flag, email_flag,
phone_flag, phone_flag,
watch_list, watch_list,
channel, channel,
checkin_at_utc, checkin_at_utc,
cast(checkin_at_utc as date) as checkin_date_utc,
checkout_at_utc, checkout_at_utc,
cast(checkout_at_utc as date) as checkout_date_utc,
is_cancelled, is_cancelled,
cancelled_at_utc, cancelled_at_utc,
user_email, user_email,

View file

@ -19,9 +19,11 @@ models:
data_type: text data_type: text
description: "unique Superhog generated id for a booking" description: "unique Superhog generated id for a booking"
- name: id_user_host - name: id_user_partner
data_type: text data_type: text
description: "unique Superhog generated id for host" description: "unique Superhog generated id for partner"
tests:
- not_null
- name: id_accommodation - name: id_accommodation
data_type: text data_type: text
@ -56,7 +58,7 @@ models:
description: "short explanation for status" description: "short explanation for status"
- name: nightly_fee_local - name: nightly_fee_local
data_type: text data_type: double precision
description: "fee charged per night" description: "fee charged per night"
- name: number_nights - name: number_nights
@ -83,10 +85,18 @@ models:
data_type: timestamp without time zone data_type: timestamp without time zone
description: "Timestamp of checkin for the booking" description: "Timestamp of checkin for the booking"
- name: checkin_date_utc
data_type: date
description: "Timestamp of checkin for the booking"
- name: checkout_at_utc - name: checkout_at_utc
data_type: timestamp without time zone data_type: timestamp without time zone
description: "Timestamp of checkout for the booking" description: "Timestamp of checkout for the booking"
- name: checkout_date_utc
data_type: date
description: "Timestamp of checkout for the booking"
- name: is_cancelled - name: is_cancelled
data_type: boolean data_type: boolean
description: "" description: ""
@ -184,19 +194,19 @@ models:
combination_of_columns: combination_of_columns:
- year_month_created - year_month_created
- year_month_checkout - year_month_checkout
- id_user_host - id_user_partner
columns: columns:
- name: id_user_partner
data_type: text
description: "unique id value for user partner"
- name: year_month_created - name: year_month_created
data_type: text data_type: text
description: "first day of month of created date" description: "year and month of created date"
- name: year_month_checkout - name: year_month_checkout
data_type: text data_type: text
description: "first day of month of check-out date" description: "year and month of check-out date"
- name: id_user_host
data_type: text
description: "unique id value for user host"
- name: bookings_per_month - name: bookings_per_month
data_type: bigint data_type: bigint
@ -210,6 +220,12 @@ models:
data_type: numeric data_type: numeric
description: "ratio of cancelled bookings over total bookings" description: "ratio of cancelled bookings over total bookings"
- name: is_cancellation_threshold_surpassed
data_type: boolean
description: "true if the cancellation ratio is higher than the set threshold"
tests:
- not_null
- name: sum_cancelled_fee - name: sum_cancelled_fee
data_type: numeric data_type: numeric
description: "sum of fees charged for cancelled bookings description: "sum of fees charged for cancelled bookings

View file

@ -5,10 +5,11 @@ with
select select
year_month_created as year_month_created, year_month_created as year_month_created,
year_month_checkout as year_month_checkout, year_month_checkout as year_month_checkout,
id_user_host as id_user_host, id_user_partner as id_user_partner,
bookings_per_month as bookings_per_month, bookings_per_month as bookings_per_month,
cancelled_per_month as cancelled_per_month, cancelled_per_month as cancelled_per_month,
cancelled_ratio as cancelled_ratio, cancelled_ratio as cancelled_ratio,
is_cancellation_threshold_surpassed as is_cancellation_threshold_surpassed,
sum_cancelled_fee as sum_cancelled_fee, sum_cancelled_fee as sum_cancelled_fee,
sum_ok_status_fee as sum_ok_status_fee, sum_ok_status_fee as sum_ok_status_fee,
sum_rejected_fee as sum_rejected_fee, sum_rejected_fee as sum_rejected_fee,

View file

@ -14,19 +14,25 @@ models:
combination_of_columns: combination_of_columns:
- year_month_created - year_month_created
- year_month_checkout - year_month_checkout
- id_user_host - id_user_partner
columns: columns:
- name: id_user_partner
data_type: text
description: "unique id value for user partner"
test:
- not_null
- name: year_month_created - name: year_month_created
data_type: text data_type: text
description: "first day of month of created date" description: "year and month of created date"
test:
- not_null
- name: year_month_checkout - name: year_month_checkout
data_type: text data_type: text
description: "first day of month of check-out date" description: "year and month of check-out date"
test:
- name: id_user_host - not_null
data_type: text
description: "unique id value for user host"
- name: bookings_per_month - name: bookings_per_month
data_type: bigint data_type: bigint
@ -40,6 +46,12 @@ models:
data_type: numeric data_type: numeric
description: "ratio of cancelled bookings over total bookings" description: "ratio of cancelled bookings over total bookings"
- name: is_cancellation_threshold_surpassed
data_type: boolean
description: "true if the cancellation ratio is higher than the set threshold"
tests:
- not_null
- name: sum_cancelled_fee - name: sum_cancelled_fee
data_type: numeric data_type: numeric
description: "sum of fees charged for cancelled bookings description: "sum of fees charged for cancelled bookings

View file

@ -27,7 +27,7 @@ with
), ),
stg_edeposit__verifications as ( stg_edeposit__verifications as (
select select
{{ adapter.quote("documents") }} ->> 'id' as id, {{ adapter.quote("documents") }} ->> 'id' as id_verification,
{{ adapter.quote("documents") }} ->> 'BookingId' as id_booking, {{ adapter.quote("documents") }} ->> 'BookingId' as id_booking,
{{ adapter.quote("documents") }} ->> 'userId' as id_user, {{ adapter.quote("documents") }} ->> 'userId' as id_user,
{{ adapter.quote("documents") }} ->> 'ListingId' as id_listing, {{ adapter.quote("documents") }} ->> 'ListingId' as id_listing,