diff --git a/models/intermediate/athena/int_athena__verifications.sql b/models/intermediate/athena/int_athena__verifications.sql new file mode 100644 index 0000000..b54e6a4 --- /dev/null +++ b/models/intermediate/athena/int_athena__verifications.sql @@ -0,0 +1,45 @@ +with stg_athena__verifications as (select * from {{ ref("stg_athena__verifications") }}) +select + -- note that these ids are not the same as the ones found in Core DWH + -- they are completely unrelated + id_verification, + id_booking, + id_user_partner, + id_accommodation, + version, + case when version = 'V1' then 'Guesty' else null end as verification_source, + verification_status, + nightly_fee_local, + cast(checkout_at_utc as date) - cast(checkin_at_utc as date) as number_nights, + email_flag, + phone_flag, + watch_list, + channel, + checkin_at_utc, + cast(checkin_at_utc as date) as checkin_date_utc, + checkout_at_utc, + cast(checkout_at_utc as date) as checkout_date_utc, + is_cancelled, + cancelled_at_utc, + cast(cancelled_at_utc as date) as cancelled_date_utc, + user_email, + guest_email, + guest_last_name, + guest_first_name, + guest_telephone, + company_name, + property_manager_name, + property_manager_email, + listing_name, + listing_town, + listing_country, + listing_postcode, + pets_allowed, + status_updated_at_utc, + status_updated_date_utc, + updated_at_utc, + athena_creation_at_utc, + athena_creation_date_utc, + created_at_utc, + created_date_utc +from stg_athena__verifications diff --git a/models/intermediate/edeposit/int_edeposit__guesty_verifications.sql b/models/intermediate/athena/int_athena__verifications_with_fees.sql similarity index 87% rename from models/intermediate/edeposit/int_edeposit__guesty_verifications.sql rename to models/intermediate/athena/int_athena__verifications_with_fees.sql index 7aa8594..0f63f91 100644 --- a/models/intermediate/edeposit/int_edeposit__guesty_verifications.sql +++ b/models/intermediate/athena/int_athena__verifications_with_fees.sql @@ -2,9 +2,7 @@ -- 2GBP/booked night if booking is approved, to be charged on checkout {% set cost_per_night = 2 %} with - int_edeposit__verifications as ( - select * from {{ ref("int_edeposit__verifications") }} - ), + int_athena__verifications as (select * from {{ ref("int_athena__verifications") }}), -- CTE to rank verifications by updated_at_utc per id_booking ranked_verifications as ( select @@ -12,7 +10,7 @@ with row_number() over ( partition by v.id_booking order by v.updated_at_utc asc ) as rn - from int_edeposit__verifications v + from int_athena__verifications v where v.version = 'V1' and v.id_booking is not null ) select diff --git a/models/intermediate/athena/schema.yml b/models/intermediate/athena/schema.yml new file mode 100644 index 0000000..d081148 --- /dev/null +++ b/models/intermediate/athena/schema.yml @@ -0,0 +1,259 @@ +version: 2 + +models: + - name: int_athena__verifications + description: "This table holds records on verifications for Athena bookings. + It contains details on validations checked on the guests, guest information + and some booking details like checkin-checkout date or the status of the verification. + The id values found here are completely unrelated to the ones found in Core DWH. + + Note that id_verifications and booking_id should normally be 1 to 1. + Though there are exception, the API will accept a duplicate booking and the users + will be charged for it. A duplicate would return a unique id_verification." + columns: + - name: id_verification + data_type: text + description: "unique Superhog generated id for this verification" + tests: + - unique + - not_null + + - name: id_booking + data_type: text + description: "unique Superhog generated id for a booking. + note that this could be duplicated and both will be charged, + it's up to the user to no generate duplicate verifications" + + - name: id_user_partner + data_type: text + description: "unique Superhog generated id for partner" + tests: + - not_null + + - name: id_accommodation + data_type: text + description: "unique Superhog generated id for a listing" + + - name: version + data_type: text + description: "value to identify if it is Guesty (V1) or E-deposit (V2)" + tests: + - accepted_values: + values: + - V1 + + - name: verification_source + data_type: text + description: "source of the verification for the booking" + tests: + - accepted_values: + values: + - Guesty + - Edeposit + + - name: verification_status + data_type: text + description: "status of the verification" + + - name: nightly_fee_local + data_type: double precision + description: "fee charged per night" + + - name: number_nights + data_type: integer + description: "number of nights for the booking" + + - name: email_flag + data_type: text + description: "screening result for email" + + - name: phone_flag + data_type: text + description: "screening result for phone" + + - name: watch_list + data_type: text + description: "screening result of the guest" + + - name: channel + data_type: text + description: "" + + - name: checkin_at_utc + data_type: timestamp without time zone + description: "Timestamp of checkin for the booking" + + - name: checkin_date_utc + data_type: date + description: "Date of checkin for the booking" + + - name: checkout_at_utc + data_type: timestamp without time zone + description: "Timestamp of checkout for the booking" + + - name: checkout_date_utc + data_type: date + description: "Date of checkout for the booking" + + - name: is_cancelled + data_type: boolean + description: "" + + - name: cancelled_at_utc + data_type: timestamp without time zone + description: "Timestamp of cancellation of the booking" + + - name: cancelled_date_utc + data_type: date + description: "Date of cancellation for the booking" + + - name: user_email + data_type: text + description: "" + + - name: guest_email + data_type: text + description: "" + + - name: guest_last_name + data_type: text + description: "" + + - name: guest_first_name + data_type: text + description: "" + + - name: guest_telephone + data_type: text + description: "" + + - name: company_name + data_type: text + description: "" + + - name: property_manager_name + data_type: text + description: "" + + - name: property_manager_email + data_type: text + description: "" + + - name: listing_name + data_type: text + description: "" + + - name: listing_town + data_type: text + description: "" + + - name: listing_country + data_type: text + description: "" + + - name: listing_postcode + data_type: text + description: "" + + - name: pets_allowed + data_type: boolean + description: "" + + - name: level_of_protection_amount + data_type: integer + description: "" + + - name: level_of_protection_currency + data_type: text + description: "" + + - name: status_updated_at_utc + data_type: timestamp without time zone + description: "Timestamp when status was last updated" + + - name: status_updated_date_utc + data_type: date + description: "Date of last status update of the verification" + + - name: updated_at_utc + data_type: timestamp without time zone + description: "Timestamp of last updated of the verification" + + - name: updated_date_utc + data_type: date + description: "Date of last update of the verification" + + - name: athena_creation_at_utc + data_type: timestamp without time zone + description: + "Athena timestamp referring to when the booking was created. + It's provided by Guesty, but is not mandatory. + In case of doubt use created_at_utc or created_date_utc fields" + + - name: athena_creation_date_utc + data_type: date + description: "Athena date referring to when the booking was created. + It's provided by Guesty, but is not mandatory. + In case of doubt use created_at_utc or created_date_utc fields" + + - name: created_at_utc + data_type: timestamp without time zone + description: "Timestamp of creation of the verification in the system" + + - name: created_date_utc + data_type: date + description: "Date of creation of the verification in the system" + + - name: int_athena__verifications_with_fees + description: "This table shows all verification for Guesty. + The charged fee is 2GBP per booked night if booking is approved + (considered 1 night when the checkin and checkout are on the same day), + to be charged on checkout." + columns: + - name: id_verification + data_type: text + description: "unique Superhog generated id for this verification" + tests: + - unique + - not_null + + - name: id_booking + data_type: text + description: "unique Superhog generated id for a booking. + note that there might be duplicate bookings on the original data + but we remove them keeping only the verification with the most recent update." + tests: + - not_null + - unique + + - name: verification_status + data_type: text + description: "status of the verification" + + - name: is_cancelled + data_type: boolean + description: "indicates if the booking has been cancelled or not." + tests: + - not_null + + - name: ok_status_fee_in_gbp + data_type: integer + description: "total fee charged on checkout, this is only charged for approved verifications" + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: true + where: is_cancelled = false and verification_status = 'Approved' + + - name: created_date_utc + data_type: date + description: "Date of creation of the verification in the system" + tests: + - not_null + + - name: checkout_date_utc + data_type: date + description: "Date of checkout for the booking" + tests: + - not_null diff --git a/models/intermediate/edeposit/int_edeposit__verification_fees.sql b/models/intermediate/edeposit/int_edeposit__verification_fees.sql index ad11fe4..8250d6a 100644 --- a/models/intermediate/edeposit/int_edeposit__verification_fees.sql +++ b/models/intermediate/edeposit/int_edeposit__verification_fees.sql @@ -63,7 +63,6 @@ with on cer.from_currency = eu.currency and cer.rate_date_utc = v.checkout_date_utc and cer.to_currency = 'GBP' - where version = 'V2' ), monthly_cancellation_threshold as ( select diff --git a/models/intermediate/edeposit/int_edeposit__verifications.sql b/models/intermediate/edeposit/int_edeposit__verifications.sql index 87c2e4c..a7c8df0 100644 --- a/models/intermediate/edeposit/int_edeposit__verifications.sql +++ b/models/intermediate/edeposit/int_edeposit__verifications.sql @@ -10,9 +10,7 @@ select id_user_partner, id_accommodation, version, - case - when version = 'V1' then 'Guesty' when version = 'V2' then 'Edeposit' else null - end as verification_source, + case when version = 'V2' then 'Edeposit' else null end as verification_source, verification_status, nightly_fee_local, cast(checkout_at_utc as date) - cast(checkin_at_utc as date) as number_nights, diff --git a/models/intermediate/edeposit/schema.yml b/models/intermediate/edeposit/schema.yml index d0dcd33..9356e25 100644 --- a/models/intermediate/edeposit/schema.yml +++ b/models/intermediate/edeposit/schema.yml @@ -41,7 +41,6 @@ models: tests: - accepted_values: values: - - V1 - V2 - name: verification_source @@ -219,29 +218,29 @@ models: description: "Unique Superhog generated id for this verification. Note that there are some users that have a different id in Cosmos. For those users we created a mapping to relate this ids." - # tests: - # - unique - # - not_null + tests: + - unique + - not_null - name: id_booking data_type: text description: "unique Superhog generated id for a booking. note that this could be duplicated and both will be charged, it's up to the user to no generate duplicate verifications" - # tests: - # - not_null + tests: + - not_null - name: id_user_partner data_type: text description: "unique Superhog generated id for partner" - # tests: - # - not_null + tests: + - not_null - name: id_accommodation data_type: text description: "unique Superhog generated id for a listing" - # tests: - # - not_null + tests: + - not_null - name: listing_town data_type: text @@ -270,8 +269,8 @@ models: - name: currency data_type: text description: "currency in which the transaction actually happened" - # tests: - # - not_null + tests: + - not_null - name: nightly_fee_local data_type: double precision @@ -284,62 +283,62 @@ models: - name: ok_status_fee_in_txn_currency data_type: numeric description: "fee charged in used currency for approved or flagged verifications and not cancelled" - # tests: - # - not_null - # - dbt_expectations.expect_column_values_to_be_between: - # min_value: 0 - # strictly: false + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: false - name: ok_status_fee_in_gbp data_type: numeric description: "fee charged in gbp for approved or flagged verifications and not cancelled" - # tests: - # - not_null - # - dbt_expectations.expect_column_values_to_be_between: - # min_value: 0 - # strictly: false + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: false - name: rejected_fee_in_txn_currency data_type: numeric description: "fee charged in used currency for rejected verifications" - # tests: - # - not_null - # - dbt_expectations.expect_column_values_to_be_between: - # min_value: 0 - # strictly: false + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: false - name: rejected_fee_in_gbp data_type: numeric description: "fee charged in gbp for rejected verifications" - # tests: - # - not_null - # - dbt_expectations.expect_column_values_to_be_between: - # min_value: 0 - # strictly: false + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: false - name: cancelled_fee_in_txn_currency data_type: numeric description: "fee charged in used currency for cancelled verifications" - # tests: - # - not_null - # - dbt_expectations.expect_column_values_to_be_between: - # min_value: 0 - # strictly: false + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: false - name: cancelled_fee_in_gbp data_type: numeric description: "fee charged in gbp for cancelled verifications" - # tests: - # - not_null - # - dbt_expectations.expect_column_values_to_be_between: - # min_value: 0 - # strictly: false + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: false - name: created_date_utc data_type: date description: "Date of creation of the verification in the system" - # tests: - # - not_null + tests: + - not_null - name: cancelled_date_utc data_type: date @@ -348,60 +347,6 @@ models: - name: checkin_date_utc data_type: date description: "Date of checkin for the booking" - # tests: - # - not_null - - - name: checkout_date_utc - data_type: date - description: "Date of checkout for the booking" - # tests: - # - not_null - - - name: int_edeposit__guesty_verifications - description: "This table shows all verification for Guesty. - The charged fee is 2GBP per booked night if booking is approved - (considered 1 night when the checkin and checkout are on the same day), - to be charged on checkout." - columns: - - name: id_verification - data_type: text - description: "unique Superhog generated id for this verification" - tests: - - unique - - not_null - - - name: id_booking - data_type: text - description: "unique Superhog generated id for a booking. - note that there might be duplicate bookings on the original data - but we remove them keeping only the verification with the most recent update." - tests: - - not_null - - unique - - - name: verification_status - data_type: text - description: "status of the verification" - - - name: is_cancelled - data_type: boolean - description: "indicates if the booking has been cancelled or not." - tests: - - not_null - - - name: ok_status_fee_in_gbp - data_type: integer - description: "total fee charged on checkout, this is only charged for approved verifications" - tests: - - not_null - - dbt_expectations.expect_column_values_to_be_between: - min_value: 0 - strictly: true - where: is_cancelled = false and verification_status = 'Approved' - - - name: created_date_utc - data_type: date - description: "Date of creation of the verification in the system" tests: - not_null diff --git a/models/reporting/athena/athena__verification_fees.sql b/models/reporting/athena/athena__verification_fees.sql new file mode 100644 index 0000000..1c9e9e1 --- /dev/null +++ b/models/reporting/athena/athena__verification_fees.sql @@ -0,0 +1,14 @@ +with + int_athena__verifications_with_fees as ( + select * from {{ ref("int_athena__verifications_with_fees") }} + ) + +select + avf.id_verification as id_verification, + avf.id_booking as id_booking, + avf.verification_status as verification_status, + avf.is_cancelled as is_cancelled, + avf.ok_status_fee_in_gbp as ok_status_fee_in_gbp, + avf.created_date_utc as created_date_utc, + avf.checkout_date_utc as checkout_date_utc +from int_athena__verifications_with_fees avf diff --git a/models/reporting/athena/schema.yml b/models/reporting/athena/schema.yml new file mode 100644 index 0000000..c7ab78c --- /dev/null +++ b/models/reporting/athena/schema.yml @@ -0,0 +1,55 @@ +version: 2 + +models: + - name: athena__verification_fees + description: "This table shows all verification for Guesty. + The charged fee is 2GBP per booked night if booking is approved + (considered 1 night when the checkin and checkout are on the same day), + to be charged on checkout." + columns: + - name: id_verification + data_type: text + description: "unique Superhog generated id for this verification" + tests: + - unique + - not_null + + - name: id_booking + data_type: text + description: "unique Superhog generated id for a booking. + note that this could be duplicated and both will be charged, + it's up to the user to not generate or cancel duplicate verifications" + tests: + - not_null + + - name: verification_status + data_type: text + description: "status of the verification" + + - name: is_cancelled + data_type: boolean + description: "indicates if the booking has been cancelled or not." + tests: + - not_null + + - name: ok_status_fee_in_gbp + data_type: integer + description: "total fee charged on checkout, this is only charged for approved verifications" + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: true + where: is_cancelled = false and verification_status = 'Approved' + + - name: created_date_utc + data_type: date + description: "Date of creation of the verification in the system" + tests: + - not_null + + - name: checkout_date_utc + data_type: date + description: "Date of checkout for the booking" + tests: + - not_null diff --git a/models/reporting/edeposit/edeposit__guesty_verifications.sql b/models/reporting/edeposit/edeposit__guesty_verifications.sql index d2e88df..ddcdf08 100644 --- a/models/reporting/edeposit/edeposit__guesty_verifications.sql +++ b/models/reporting/edeposit/edeposit__guesty_verifications.sql @@ -1,14 +1,8 @@ -with - int_edeposit__guesty_verifications as ( - select * from {{ ref("int_edeposit__guesty_verifications") }} - ) - -select - gv.id_verification as id_verification, - gv.id_booking as id_booking, - gv.verification_status as verification_status, - gv.is_cancelled as is_cancelled, - gv.ok_status_fee_in_gbp as ok_status_fee_in_gbp, - gv.created_date_utc as created_date_utc, - gv.checkout_date_utc as checkout_date_utc -from int_edeposit__guesty_verifications gv +/* +This model here should not exist and will be deprecated. Downstream dependencies +should switch to reading from the new `athena__verification_fees`. The model +remains here as pointer to the new one to give downstream dependencies time to switch. +*/ +with athena__verification_fees as (select * from {{ ref("athena__verification_fees") }}) +select * +from athena__verification_fees diff --git a/models/reporting/edeposit/schema.yml b/models/reporting/edeposit/schema.yml index c730ce8..81c7206 100644 --- a/models/reporting/edeposit/schema.yml +++ b/models/reporting/edeposit/schema.yml @@ -12,30 +12,30 @@ models: - name: id_verification data_type: text description: "unique Superhog generated id for this verification" - # tests: - # - unique - # - not_null + tests: + - unique + - not_null - name: id_booking data_type: text description: "unique Superhog generated id for a booking. note that there might be duplicate bookings on the original data but we remove them keeping only the verification with the most recent update." - # tests: - # - not_null - # - unique + tests: + - not_null + - unique - name: id_user_partner data_type: text description: "unique Superhog generated id for partner" - # tests: - # - not_null + tests: + - not_null - name: id_accommodation data_type: text description: "unique Superhog generated id for a listing" - # tests: - # - not_null + tests: + - not_null - name: listing_town data_type: text @@ -64,8 +64,8 @@ models: - name: currency data_type: text description: "currency in which the transaction actually happened" - # tests: - # - not_null + tests: + - not_null - name: nightly_fee_local data_type: double precision @@ -78,62 +78,62 @@ models: - name: ok_status_fee_in_txn_currency data_type: numeric description: "fee charged in used currency for approved or flagged verifications and not cancelled" - # tests: - # - not_null - # - dbt_expectations.expect_column_values_to_be_between: - # min_value: 0 - # strictly: false + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: false - name: ok_status_fee_in_gbp data_type: numeric description: "fee charged in gbp for approved or flagged verifications and not cancelled" - # tests: - # - not_null - # - dbt_expectations.expect_column_values_to_be_between: - # min_value: 0 - # strictly: false + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: false - name: rejected_fee_in_txn_currency data_type: numeric description: "fee charged in used currency for rejected verifications" - # tests: - # - not_null - # - dbt_expectations.expect_column_values_to_be_between: - # min_value: 0 - # strictly: false + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: false - name: rejected_fee_in_gbp data_type: numeric description: "fee charged in gbp for rejected verifications" - # tests: - # - not_null - # - dbt_expectations.expect_column_values_to_be_between: - # min_value: 0 - # strictly: false + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: false - name: cancelled_fee_in_txn_currency data_type: numeric description: "fee charged in used currency for cancelled verifications" - # tests: - # - not_null - # - dbt_expectations.expect_column_values_to_be_between: - # min_value: 0 - # strictly: false + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: false - name: cancelled_fee_in_gbp data_type: numeric description: "fee charged in gbp for cancelled verifications" - # tests: - # - not_null - # - dbt_expectations.expect_column_values_to_be_between: - # min_value: 0 - # strictly: false + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + strictly: false - name: created_date_utc data_type: date description: "Date of creation of the verification in the system" - # tests: - # - not_null + tests: + - not_null - name: cancelled_date_utc data_type: date @@ -142,20 +142,26 @@ models: - name: checkin_date_utc data_type: date description: "Date of checkin for the booking" - # tests: - # - not_null + tests: + - not_null - name: checkout_date_utc data_type: date description: "Date of checkout for the booking" - # tests: - # - not_null + tests: + - not_null - name: edeposit__guesty_verifications description: "This table shows all verification for Guesty. The charged fee is 2GBP per booked night if booking is approved (considered 1 night when the checkin and checkout are on the same day), to be charged on checkout." + latest_version: 1 + versions: + - v: 1 + deprecation_date: "2024-10-15T00:00:00Z" + config: + alias: edeposit__guesty_verifications columns: - name: id_verification data_type: text diff --git a/models/staging/athena/_athena_sources.yml b/models/staging/athena/_athena_sources.yml new file mode 100644 index 0000000..9f42604 --- /dev/null +++ b/models/staging/athena/_athena_sources.yml @@ -0,0 +1,10 @@ +version: 2 + +sources: + - name: athena + # This will have to change to the Athena sync schema once the + # Athena/Edeposit database split happens. + schema: sync_cdb_edeposit + tables: + - name: verifications + identifier: verifications diff --git a/models/staging/athena/schema.yml b/models/staging/athena/schema.yml new file mode 100644 index 0000000..4d7bb9a --- /dev/null +++ b/models/staging/athena/schema.yml @@ -0,0 +1,269 @@ +version: 2 + +models: + - name: stg_athena__verifications + description: | + Records of each transaction that happens in the Athena API. Records are + mutable and can get updated. + + The table originally receives both records from Athena and edeposit APIs, + but we force only keeping Athena (V1) records here. + columns: + - name: id_verification + data_type: character varying + description: "Unique id for the specific transaction." + tests: + - unique + - not_null + + - name: id_booking + data_type: text + description: "" + tests: + - not_null: + where: created_at_utc > '2024-04-01T00:00:00Z' and verification_status != 'InsufficientInformation' + + - name: id_user_partner + data_type: text + description: The unique ID of the partner calling the API. + tests: + - not_null + + - name: id_accommodation + data_type: text + description: An ID for the listing related to the booking. + + - name: version + data_type: text + description: | + Indicates whether the verification is for V1 (Athena) or V2 + (e-deposit). + tests: + - not_null + - accepted_values: + values: + - "V1" + + - name: nightly_fee_local + data_type: numeric + description: | + The fee per night to be charged for this verification, in the currency + of the user. Note that the Athena/e-deposit user also has a configured + nightly fee. It's unclear at this point which one has priority for + billing. + tests: + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + max_value: 100 + strictly: true + + - name: verification_status + data_type: text + description: | + This field shows the outcome of the Verification itself. + tests: + - not_null + - accepted_values: + values: + - "Approved" + - "Flagged" + - "Rejected" + - "InsufficientInformation" + + - name: verification_status_reason + data_type: text + description: Not used, ignore. + + - name: email_flag + data_type: text + description: | + Null if the email shows no issues, otherwise it details the problems + attached to the given email. + + - name: phone_flag + data_type: text + description: | + Null if the phone number shows no issues, otherwise it details the + problems attached to the given phone number. + tests: + - accepted_values: + values: + - "Phone number not reachable" + - "Not a real phone number" + - "Phone number is disposable" + + - name: watch_list + data_type: text + description: "" + tests: + - accepted_values: + values: + - "Match" + - "No Match" + + - name: channel + data_type: text + description: "" + + - name: checkin_at_utc + data_type: timestamp without time zone + description: "" + + - name: checkout_at_utc + data_type: timestamp without time zone + description: "" + + - name: is_cancelled + data_type: boolean + description: | + Indicates if the booking has been cancelled or not. At the source, + null and false values have the same meaning, so here we turn nulls + into false to keep thing simple. + tests: + - not_null + - accepted_values: + values: + - true + - false + + - name: cancelled_at_utc + data_type: timestamp without time zone + description: If the booking was cancelled, when did we learn about it. + tests: + - not_null: + where: is_cancelled = true + + - name: user_email + data_type: text + description: | + The email of the Athena/e-deposit partner user. + + - name: guest_email + data_type: text + description: "" + + - name: guest_last_name + data_type: text + description: "" + + - name: guest_first_name + data_type: text + description: "" + + - name: guest_telephone + data_type: text + description: "" + + - name: company_name + data_type: text + description: "" + + - name: property_manager_name + data_type: text + description: "" + + - name: property_manager_email + data_type: text + description: "" + + - name: listing_name + data_type: text + description: "" + + - name: listing_town + data_type: text + description: "" + + - name: listing_address + data_type: text + description: "" + + - name: listing_country + data_type: text + description: "" + + - name: listing_postcode + data_type: text + description: "" + + - name: pets_allowed + data_type: boolean + description: "" + + - name: level_of_protection_amount + data_type: integer + description: Ignore. + + - name: level_of_protection_currency + data_type: text + description: Ignore. + + - name: attachments + data_type: text + description: "" + + - name: status_updated_at_utc + data_type: timestamp without time zone + description: | + Timestamp of the last time the record was modified before screening + happened. Only relevant for V1 records. + tests: + - not_null: + where: version = 'V1' + + - name: status_updated_date_utc + data_type: timestamp without time zone + description: | + Date of the last time the record was modified before screening + happened. Only relevant for V1 records. + tests: + - not_null: + where: version = 'V1' + + - name: updated_at_utc + data_type: timestamp without time zone + description: | + Timestamp of the last edit of the record, as set by the + Athena/e-deposit application. + tests: + - not_null + + - name: updated_date_utc + data_type: timestamp without time zone + description: | + Date of the last edit of the record, as set by the + Athena/e-deposit application. + tests: + - not_null + + - name: athena_creation_at_utc + data_type: timestamp without time zone + description: | + A client-provided timestamp of when the booking was created. It's an + optional field, only relevant for 'V1' records. + + - name: athena_creation_date_utc + data_type: timestamp without time zone + description: | + A client-provided date of when the booking was created. It's an + optional field, only relevant for 'V1' records. + + - name: created_at_utc + data_type: timestamp without time zone + description: | + The internal application timestamp of when this record was created. + tests: + - not_null + + - name: created_date_utc + data_type: timestamp without time zone + description: | + The internal application date of when this record was created. + tests: + - not_null + + - name: cosmos_db_timestamp_utc + data_type: timestamp with time zone + description: The internal Cosmos DB timestamp of the last record update. + tests: + - not_null diff --git a/models/staging/athena/stg_athena__verifications.sql b/models/staging/athena/stg_athena__verifications.sql new file mode 100644 index 0000000..3ffdabd --- /dev/null +++ b/models/staging/athena/stg_athena__verifications.sql @@ -0,0 +1,89 @@ +with + raw_verifications as (select * from {{ source("athena", "verifications") }}), + deduped_verifications as ( + {{ cosmos_db_record_deduplication("raw_verifications", "id") }} + ), + stg_athena__verifications as ( + select + {{ adapter.quote("documents") }} ->> 'id' as id_verification, + {{ adapter.quote("documents") }} ->> 'BookingId' as id_booking, + {{ adapter.quote("documents") }} ->> 'userId' as id_user_partner, + {{ adapter.quote("documents") }} ->> 'ListingId' as id_accommodation, + + {{ adapter.quote("documents") }} ->> 'Version' as "version", + + cast( + {{ adapter.quote("documents") }} ->> 'NightlyFee' as decimal(19, 4) + ) as "nightly_fee_local", + + {{ adapter.quote("documents") }} ->> 'Status' as verification_status, + {{ adapter.quote("documents") }} + ->> 'StatusReason' as verification_status_reason, + {{ adapter.quote("documents") }} ->> 'EmailFlag' as email_flag, + {{ adapter.quote("documents") }} ->> 'PhoneFlag' as phone_flag, + {{ adapter.quote("documents") }} ->> 'WatchList' as watch_list, + + {{ adapter.quote("documents") }} ->> 'Channel' as channel, + + ({{ adapter.quote("documents") }} ->> 'CheckIn')::timestamp + as checkin_at_utc, + ({{ adapter.quote("documents") }} ->> 'CheckOut')::timestamp + as checkout_at_utc, + coalesce( + ({{ adapter.quote("documents") }} ->> 'Cancelled')::boolean, false + ) as is_cancelled, + ({{ adapter.quote("documents") }} ->> 'CancellationDate')::timestamp + as cancelled_at_utc, + + {{ adapter.quote("documents") }} ->> 'UserEmail' as user_email, + {{ adapter.quote("documents") }} ->> 'GuestEmail' as guest_email, + {{ adapter.quote("documents") }} ->> 'GuestLastName' as guest_last_name, + {{ adapter.quote("documents") }} ->> 'GuestFirstName' as guest_first_name, + {{ adapter.quote("documents") }} ->> 'GuestTelephone' as guest_telephone, + + {{ adapter.quote("documents") }} ->> 'CompanyName' as company_name, + {{ adapter.quote("documents") }} + ->> 'PropertyManagerName' as property_manager_name, + {{ adapter.quote("documents") }} + ->> 'PropertyManagerEmail' as property_manager_email, + {{ adapter.quote("documents") }} ->> 'ListingName' as listing_name, + {{ adapter.quote("documents") }} ->> 'ListingTown' as listing_town, + + {{ adapter.quote("documents") }} ->> 'ListingAddress' as listing_address, + {{ adapter.quote("documents") }} ->> 'ListingCountry' as listing_country, + {{ adapter.quote("documents") }} ->> 'ListingPostcode' as listing_postcode, + ({{ adapter.quote("documents") }} ->> 'PetsAllowed')::boolean + as pets_allowed, + + ({{ adapter.quote("documents") }} ->> 'LevelOfProtectionAmount')::float + ::integer as level_of_protection_amount, + ( + {{ adapter.quote("documents") }} ->> 'LevelOfProtectionCurrency' + ) as level_of_protection_currency, + + {{ adapter.quote("documents") }} ->> '_attachments' as attachments, + + ({{ adapter.quote("documents") }} ->> 'StatusUpdatedDate')::timestamp + as status_updated_at_utc, + ({{ adapter.quote("documents") }} ->> 'StatusUpdatedDate')::date + as status_updated_date_utc, + ({{ adapter.quote("documents") }} ->> 'UpdatedDate')::timestamp + as updated_at_utc, + ({{ adapter.quote("documents") }} ->> 'UpdatedDate')::date + as updated_date_utc, + ({{ adapter.quote("documents") }} ->> 'CreationDate')::timestamp + as athena_creation_at_utc, + ({{ adapter.quote("documents") }} ->> 'CreationDate')::date + as athena_creation_date_utc, + ({{ adapter.quote("documents") }} ->> 'CreatedDate')::timestamp + as created_at_utc, + ({{ adapter.quote("documents") }} ->> 'CreatedDate')::date + as created_date_utc, + to_timestamp( + (({{ adapter.quote("documents") }} ->> '_ts'))::integer + ) as cosmos_db_timestamp_utc + from deduped_verifications + ) +select * +from stg_athena__verifications +where version = 'V1' diff --git a/models/staging/edeposit/schema.yml b/models/staging/edeposit/schema.yml index 4d7167c..1d56ff4 100644 --- a/models/staging/edeposit/schema.yml +++ b/models/staging/edeposit/schema.yml @@ -2,9 +2,12 @@ version: 2 models: - name: stg_edeposit__verifications - description: - "Records of each transaction that happens in the edeposit API. Records are - mutable and can get updated." + description: | + Records of each transaction that happens in the edeposit API. Records are + mutable and can get updated. + + The table originally receives both records from Athena and edeposit APIs, + but we force only keeping edeposit (V2) records here. columns: - name: id_verification data_type: character varying @@ -39,7 +42,6 @@ models: - not_null - accepted_values: values: - - "V1" - "V2" - name: nightly_fee_local diff --git a/models/staging/edeposit/stg_edeposit__verifications.sql b/models/staging/edeposit/stg_edeposit__verifications.sql index 4be0d06..72fbc12 100644 --- a/models/staging/edeposit/stg_edeposit__verifications.sql +++ b/models/staging/edeposit/stg_edeposit__verifications.sql @@ -86,3 +86,4 @@ with ) select * from stg_edeposit__verifications +where version = 'V2'