Merged PR 2036: Create int rates table

- Creates an intermediate rates table that both uses real rates from xe.com and also make some assumptions about future and past dates.
- Also took the chance to place all orphan int models into a `cross` folder.

Related work items: #17212
This commit is contained in:
Pablo Martín 2024-06-14 14:30:15 +00:00
commit 670cf9f30f
6 changed files with 237 additions and 2 deletions

View file

@ -47,4 +47,6 @@ seeds:
vars:
"dbt_date:time_zone": "Europe/London"
"start_date": "'2020-01-01'"
# A general cutoff date for relevancy. Many models assume this to be the point
# in time after which they should work.
"start_date": "'2020-01-01'"

View file

@ -0,0 +1,147 @@
/*
How deep into the future since the last available real rate should rates be
projected. Must follow be a valid postgres interval string, such as '1 years'
See examples here:
https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT-EXAMPLES
*/
{% set interval_into_the_future = "'3 years'" %}
with
stg_xedotcom__exchange_rates as (
select * from {{ ref("stg_xedotcom__exchange_rates") }}
),
-- CTEs for projecting forward
most_recent_xe_date_per_curr_pair as (
select from_currency, to_currency, max(rate_date_utc) as most_recent_date_utc
from stg_xedotcom__exchange_rates
group by 1, 2
),
last_xe_rate_per_curr_pair as (
select xe.from_currency, xe.to_currency, xe.rate
from stg_xedotcom__exchange_rates xe
inner join
most_recent_xe_date_per_curr_pair recent
on xe.from_currency = recent.from_currency
and xe.to_currency = recent.to_currency
and xe.rate_date_utc = recent.most_recent_date_utc
),
future_dates as (
select date_trunc('day', day_series)::date as future_date
from
generate_series
(
(
select max(most_recent_date_utc)
from most_recent_xe_date_per_curr_pair
)::timestamp
+ interval '1 days', -- we don't want to pick the last real day
(
select max(most_recent_date_utc)
from most_recent_xe_date_per_curr_pair
)::timestamp
+ interval {{ interval_into_the_future }},
'1 day'::interval
) day_series
),
projected_future_rates as (
select
concat(
last_rate.from_currency,
last_rate.to_currency,
future_dates.future_date,
'xe_inferred',
'forecast'
) as id_exchange_rate,
last_rate.from_currency,
last_rate.to_currency,
last_rate.rate,
future_dates.future_date as rate_date_utc,
'xe_inferred' as source,
'forecast' as rate_version,
now() as updated_at_utc
from future_dates future_dates
cross join last_xe_rate_per_curr_pair last_rate
),
-- CTEs for projecting backward
oldest_xe_date_per_curr_pair as (
select from_currency, to_currency, max(rate_date_utc) as oldest_date_utc
from stg_xedotcom__exchange_rates
group by 1, 2
),
first_xe_rate_per_curr_pair as (
select xe.from_currency, xe.to_currency, xe.rate
from stg_xedotcom__exchange_rates xe
inner join
oldest_xe_date_per_curr_pair recent
on xe.from_currency = recent.from_currency
and xe.to_currency = recent.to_currency
and xe.rate_date_utc = recent.oldest_date_utc
),
past_dates as (
select date_trunc('day', day_series)::date as past_date
from
generate_series
(
{{ var("start_date") }}::timestamp,
(
select max(most_recent_date_utc)
from most_recent_xe_date_per_curr_pair
)::timestamp
- interval '2 days', -- we don't want to pick the last real day,
'1 day'::interval
) day_series
),
projected_past_rates as (
select
concat(
first_rate.from_currency,
first_rate.to_currency,
past_dates.past_date,
'xe_inferred',
'guess'
) as id_exchange_rate,
first_rate.from_currency,
first_rate.to_currency,
first_rate.rate,
past_dates.past_date as rate_date_utc,
'xe_inferred' as source,
'guess' as rate_version,
now() as updated_at_utc
from past_dates
cross join first_xe_rate_per_curr_pair first_rate
)
-- here we union actual stuff, future projected stuff and past assumed stuff
select
concat(
xe.from_currency, xe.to_currency, xe.rate_date_utc, 'xedotcom', 'actual'
) as id_exchange_rate, -- the id from staging is not useful here because dimensionality is different: here we have rate_source and rate_version
xe.from_currency,
xe.to_currency,
xe.rate,
xe.rate_date_utc,
'xedotcom' as source,
'actual' as rate_version,
xe.dwh_extracted_at_utc as updated_at_utc
from stg_xedotcom__exchange_rates xe
union all
select
fu.id_exchange_rate,
fu.from_currency,
fu.to_currency,
fu.rate,
fu.rate_date_utc,
fu.source,
fu.rate_version,
fu.updated_at_utc
from projected_future_rates fu
union all
select
pa.id_exchange_rate,
pa.from_currency,
pa.to_currency,
pa.rate,
pa.rate_date_utc,
pa.source,
pa.rate_version,
pa.updated_at_utc
from projected_past_rates pa

View file

@ -4,4 +4,4 @@ with
select d.date_day as rate_date, r.*
from dates d
cross join hardcoded_rates r
where d.date_day > cast('2020-01-01' as date)
where d.date_day > cast({{ var("start_date") }} as date)

View file

@ -0,0 +1,86 @@
models:
- name: int_daily_currency_exchange_rates
description: >-
This model holds a lot of data on currency exchange rates. The time
granularity is daily. Each record holds a currency pair for a specific
day, source and version.
Actual rates are sourced from xe.com data. The `guessed` and `forecast`
versions are built by simply 'pushing' the first/last exchange rate on
record. Basically, wherever we dont' have data for a date, we pick the
closest actual data point that comes from xe.com. Bear in mind this means
that `forecast` version records will change on a daily basis as actual
data moves forwards, meaning you shouldn't assume your money amounts
converted in the future should always stay put.
Note that, given the dimensionality, getting a simple time series for a
currency pair will require a bit of filtering.
Reverse rates are explicit. This means that, for any given day and any
given currency pair, you will find two records with opposite from/to
positions. So, for 2024-01-01, you will find both a EUR->USD record and a
USD->EUR record with the opposite rate (1/rate).
columns:
- name: id_exchange_rate
data_type: text
description: A unique ID for the record, derived from concatenating the
currencies, date, source and version. Currency order is relevant
(EURUSD != USDEUR).
tests:
- not_null
- unique
- name: from_currency
data_type: character
description: The source currency, represented as an ISO 4217 code.
tests:
- not_null
- name: to_currency
data_type: character
description: The target currency, represented as an ISO 4217 code.
tests:
- not_null
- name: rate
data_type: numeric
description: >-
The exchange rate, represented as the units of the target currency
that one unit of source currency gets you. So, from_currency=USD to
to_currency=PLN with rate=4.2 should be read as '1 US Dollar buys me
4.2 Polish Zlotys'.
For same currency pairs (EUR to EUR, USD to USD, etc). The rate will
always be one.
The rate can be smaller than one, but can't be negative.
tests:
- not_negative_or_zero
- not_null
- name: rate_date_utc
data_type: date
description: The date in which the rate record is relevant.
tests:
- not_null
- name: source
data_type: text
description: Where is the data coming from. Records that are composed from
making assumptions on real data will contain `_inferred`.
- name: rate_version
data_type: text
description: The version of the rate. This can be one of `actual` (the rate is a
reality fact), `forecast` (the rate sits in the future and is a guess
in nature) or `guess` (the rate sits in the past and is a guess in
nature). Note that one currency pair can have multiple rate versions
on the same date.
tests:
- accepted_values:
values:
- guess
- actual
- forecast
- not_null
- name: updated_at_utc
data_type: timestamp with time zone
description: For external sources, this will be the point in time when the
information was obtained from them. For stuff we make up here in the
DWH, this will be the point in time when we made the assumption.
tests:
- not_null