diff --git a/models/staging/edeposit/schema.yml b/models/staging/edeposit/schema.yml new file mode 100644 index 0000000..a90dec6 --- /dev/null +++ b/models/staging/edeposit/schema.yml @@ -0,0 +1,15 @@ +version: 2 + +models: + - name: stg_edeposit__verifications + description: + "Records of each transaction that happens in the edeposit API. Records are + mutable and can get updated." + columns: + - name: id + data_type: character varying + description: "Unique id for the specific transaction." + tests: + - unique + - not_null + # Plenty of stuff pending. You cheeky Pablo diff --git a/models/staging/edeposit/stg_edeposit__verifications.sql b/models/staging/edeposit/stg_edeposit__verifications.sql index 50c4fe2..5a57434 100644 --- a/models/staging/edeposit/stg_edeposit__verifications.sql +++ b/models/staging/edeposit/stg_edeposit__verifications.sql @@ -1,5 +1,30 @@ with raw_verifications as (select * from {{ source("edeposit", "verifications") }}), + deduped_verifications as ( + select * + from + -- Some thoughts for the future here: + -- ··· The query below is awful performance wise, I know. But data + -- size is tiny today. Let's tackle the problem as it comes. + -- + -- ··· The deduping logic below will be the same for all the Cosmos + -- DB entities that get brought into the DWH. The only changing + -- parameters will be what's the source table and the PK. I'm + -- not gonna do the macro now, but it would probably be a good + -- idea when we have a second container from Cosmos hitting the + -- DWH. + ( + select + *, + row_number() over ( + partition by {{ adapter.quote("documents") }} ->> 'id' + order by + ({{ adapter.quote("documents") }} ->> '_ts')::integer desc + ) as rank + from {{ source("edeposit", "verifications") }} + ) + where rank = 1 + ), stg_edeposit__verifications as ( select {{ adapter.quote("documents") }} ->> 'id' as id, @@ -44,8 +69,9 @@ with ({{ adapter.quote("documents") }} ->> 'LevelOfProtectionAmount')::float ::integer as level_of_protection_amount, - ({{ adapter.quote("documents") }} ->> 'LevelOfProtectionCurrency')::integer - as level_of_protection_currency, + ( + {{ adapter.quote("documents") }} ->> 'LevelOfProtectionCurrency' + ) as level_of_protection_currency, {{ adapter.quote("documents") }} ->> '_attachments' as attachments, @@ -60,7 +86,7 @@ with to_timestamp( (({{ adapter.quote("documents") }} ->> '_ts'))::integer ) as cosmos_db_timestamp_utc - from raw_verifications + from deduped_verifications ) select * from stg_edeposit__verifications