Merged PR 2036: Create int rates table
- Creates an intermediate rates table that both uses real rates from xe.com and also make some assumptions about future and past dates. - Also took the chance to place all orphan int models into a `cross` folder. Related work items: #17212
This commit is contained in:
commit
670cf9f30f
6 changed files with 237 additions and 2 deletions
|
|
@ -47,4 +47,6 @@ seeds:
|
|||
|
||||
vars:
|
||||
"dbt_date:time_zone": "Europe/London"
|
||||
"start_date": "'2020-01-01'"
|
||||
# A general cutoff date for relevancy. Many models assume this to be the point
|
||||
# in time after which they should work.
|
||||
"start_date": "'2020-01-01'"
|
||||
147
models/intermediate/cross/int_daily_currency_exchange_rates.sql
Normal file
147
models/intermediate/cross/int_daily_currency_exchange_rates.sql
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
/*
|
||||
How deep into the future since the last available real rate should rates be
|
||||
projected. Must follow be a valid postgres interval string, such as '1 years'
|
||||
See examples here:
|
||||
https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT-EXAMPLES
|
||||
*/
|
||||
{% set interval_into_the_future = "'3 years'" %}
|
||||
|
||||
with
|
||||
stg_xedotcom__exchange_rates as (
|
||||
select * from {{ ref("stg_xedotcom__exchange_rates") }}
|
||||
),
|
||||
-- CTEs for projecting forward
|
||||
most_recent_xe_date_per_curr_pair as (
|
||||
select from_currency, to_currency, max(rate_date_utc) as most_recent_date_utc
|
||||
from stg_xedotcom__exchange_rates
|
||||
group by 1, 2
|
||||
),
|
||||
last_xe_rate_per_curr_pair as (
|
||||
select xe.from_currency, xe.to_currency, xe.rate
|
||||
from stg_xedotcom__exchange_rates xe
|
||||
inner join
|
||||
most_recent_xe_date_per_curr_pair recent
|
||||
on xe.from_currency = recent.from_currency
|
||||
and xe.to_currency = recent.to_currency
|
||||
and xe.rate_date_utc = recent.most_recent_date_utc
|
||||
),
|
||||
future_dates as (
|
||||
select date_trunc('day', day_series)::date as future_date
|
||||
from
|
||||
generate_series
|
||||
(
|
||||
(
|
||||
select max(most_recent_date_utc)
|
||||
from most_recent_xe_date_per_curr_pair
|
||||
)::timestamp
|
||||
+ interval '1 days', -- we don't want to pick the last real day
|
||||
(
|
||||
select max(most_recent_date_utc)
|
||||
from most_recent_xe_date_per_curr_pair
|
||||
)::timestamp
|
||||
+ interval {{ interval_into_the_future }},
|
||||
'1 day'::interval
|
||||
) day_series
|
||||
),
|
||||
projected_future_rates as (
|
||||
select
|
||||
concat(
|
||||
last_rate.from_currency,
|
||||
last_rate.to_currency,
|
||||
future_dates.future_date,
|
||||
'xe_inferred',
|
||||
'forecast'
|
||||
) as id_exchange_rate,
|
||||
last_rate.from_currency,
|
||||
last_rate.to_currency,
|
||||
last_rate.rate,
|
||||
future_dates.future_date as rate_date_utc,
|
||||
'xe_inferred' as source,
|
||||
'forecast' as rate_version,
|
||||
now() as updated_at_utc
|
||||
from future_dates future_dates
|
||||
cross join last_xe_rate_per_curr_pair last_rate
|
||||
),
|
||||
-- CTEs for projecting backward
|
||||
oldest_xe_date_per_curr_pair as (
|
||||
select from_currency, to_currency, max(rate_date_utc) as oldest_date_utc
|
||||
from stg_xedotcom__exchange_rates
|
||||
group by 1, 2
|
||||
),
|
||||
first_xe_rate_per_curr_pair as (
|
||||
select xe.from_currency, xe.to_currency, xe.rate
|
||||
from stg_xedotcom__exchange_rates xe
|
||||
inner join
|
||||
oldest_xe_date_per_curr_pair recent
|
||||
on xe.from_currency = recent.from_currency
|
||||
and xe.to_currency = recent.to_currency
|
||||
and xe.rate_date_utc = recent.oldest_date_utc
|
||||
),
|
||||
past_dates as (
|
||||
select date_trunc('day', day_series)::date as past_date
|
||||
from
|
||||
generate_series
|
||||
(
|
||||
{{ var("start_date") }}::timestamp,
|
||||
(
|
||||
select max(most_recent_date_utc)
|
||||
from most_recent_xe_date_per_curr_pair
|
||||
)::timestamp
|
||||
- interval '2 days', -- we don't want to pick the last real day,
|
||||
'1 day'::interval
|
||||
) day_series
|
||||
),
|
||||
projected_past_rates as (
|
||||
select
|
||||
concat(
|
||||
first_rate.from_currency,
|
||||
first_rate.to_currency,
|
||||
past_dates.past_date,
|
||||
'xe_inferred',
|
||||
'guess'
|
||||
) as id_exchange_rate,
|
||||
first_rate.from_currency,
|
||||
first_rate.to_currency,
|
||||
first_rate.rate,
|
||||
past_dates.past_date as rate_date_utc,
|
||||
'xe_inferred' as source,
|
||||
'guess' as rate_version,
|
||||
now() as updated_at_utc
|
||||
from past_dates
|
||||
cross join first_xe_rate_per_curr_pair first_rate
|
||||
)
|
||||
-- here we union actual stuff, future projected stuff and past assumed stuff
|
||||
select
|
||||
concat(
|
||||
xe.from_currency, xe.to_currency, xe.rate_date_utc, 'xedotcom', 'actual'
|
||||
) as id_exchange_rate, -- the id from staging is not useful here because dimensionality is different: here we have rate_source and rate_version
|
||||
xe.from_currency,
|
||||
xe.to_currency,
|
||||
xe.rate,
|
||||
xe.rate_date_utc,
|
||||
'xedotcom' as source,
|
||||
'actual' as rate_version,
|
||||
xe.dwh_extracted_at_utc as updated_at_utc
|
||||
from stg_xedotcom__exchange_rates xe
|
||||
union all
|
||||
select
|
||||
fu.id_exchange_rate,
|
||||
fu.from_currency,
|
||||
fu.to_currency,
|
||||
fu.rate,
|
||||
fu.rate_date_utc,
|
||||
fu.source,
|
||||
fu.rate_version,
|
||||
fu.updated_at_utc
|
||||
from projected_future_rates fu
|
||||
union all
|
||||
select
|
||||
pa.id_exchange_rate,
|
||||
pa.from_currency,
|
||||
pa.to_currency,
|
||||
pa.rate,
|
||||
pa.rate_date_utc,
|
||||
pa.source,
|
||||
pa.rate_version,
|
||||
pa.updated_at_utc
|
||||
from projected_past_rates pa
|
||||
|
|
@ -4,4 +4,4 @@ with
|
|||
select d.date_day as rate_date, r.*
|
||||
from dates d
|
||||
cross join hardcoded_rates r
|
||||
where d.date_day > cast('2020-01-01' as date)
|
||||
where d.date_day > cast({{ var("start_date") }} as date)
|
||||
86
models/intermediate/cross/schema.yml
Normal file
86
models/intermediate/cross/schema.yml
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
models:
|
||||
- name: int_daily_currency_exchange_rates
|
||||
description: >-
|
||||
This model holds a lot of data on currency exchange rates. The time
|
||||
granularity is daily. Each record holds a currency pair for a specific
|
||||
day, source and version.
|
||||
|
||||
Actual rates are sourced from xe.com data. The `guessed` and `forecast`
|
||||
versions are built by simply 'pushing' the first/last exchange rate on
|
||||
record. Basically, wherever we dont' have data for a date, we pick the
|
||||
closest actual data point that comes from xe.com. Bear in mind this means
|
||||
that `forecast` version records will change on a daily basis as actual
|
||||
data moves forwards, meaning you shouldn't assume your money amounts
|
||||
converted in the future should always stay put.
|
||||
|
||||
Note that, given the dimensionality, getting a simple time series for a
|
||||
currency pair will require a bit of filtering.
|
||||
|
||||
Reverse rates are explicit. This means that, for any given day and any
|
||||
given currency pair, you will find two records with opposite from/to
|
||||
positions. So, for 2024-01-01, you will find both a EUR->USD record and a
|
||||
USD->EUR record with the opposite rate (1/rate).
|
||||
columns:
|
||||
- name: id_exchange_rate
|
||||
data_type: text
|
||||
description: A unique ID for the record, derived from concatenating the
|
||||
currencies, date, source and version. Currency order is relevant
|
||||
(EURUSD != USDEUR).
|
||||
tests:
|
||||
- not_null
|
||||
- unique
|
||||
- name: from_currency
|
||||
data_type: character
|
||||
description: The source currency, represented as an ISO 4217 code.
|
||||
tests:
|
||||
- not_null
|
||||
- name: to_currency
|
||||
data_type: character
|
||||
description: The target currency, represented as an ISO 4217 code.
|
||||
tests:
|
||||
- not_null
|
||||
- name: rate
|
||||
data_type: numeric
|
||||
description: >-
|
||||
The exchange rate, represented as the units of the target currency
|
||||
that one unit of source currency gets you. So, from_currency=USD to
|
||||
to_currency=PLN with rate=4.2 should be read as '1 US Dollar buys me
|
||||
4.2 Polish Zlotys'.
|
||||
|
||||
For same currency pairs (EUR to EUR, USD to USD, etc). The rate will
|
||||
always be one.
|
||||
|
||||
The rate can be smaller than one, but can't be negative.
|
||||
tests:
|
||||
- not_negative_or_zero
|
||||
- not_null
|
||||
- name: rate_date_utc
|
||||
data_type: date
|
||||
description: The date in which the rate record is relevant.
|
||||
tests:
|
||||
- not_null
|
||||
- name: source
|
||||
data_type: text
|
||||
description: Where is the data coming from. Records that are composed from
|
||||
making assumptions on real data will contain `_inferred`.
|
||||
- name: rate_version
|
||||
data_type: text
|
||||
description: The version of the rate. This can be one of `actual` (the rate is a
|
||||
reality fact), `forecast` (the rate sits in the future and is a guess
|
||||
in nature) or `guess` (the rate sits in the past and is a guess in
|
||||
nature). Note that one currency pair can have multiple rate versions
|
||||
on the same date.
|
||||
tests:
|
||||
- accepted_values:
|
||||
values:
|
||||
- guess
|
||||
- actual
|
||||
- forecast
|
||||
- not_null
|
||||
- name: updated_at_utc
|
||||
data_type: timestamp with time zone
|
||||
description: For external sources, this will be the point in time when the
|
||||
information was obtained from them. For stuff we make up here in the
|
||||
DWH, this will be the point in time when we made the assumption.
|
||||
tests:
|
||||
- not_null
|
||||
Loading…
Add table
Add a link
Reference in a new issue