Merged PR 2036: Create int rates table
- Creates an intermediate rates table that both uses real rates from xe.com and also make some assumptions about future and past dates. - Also took the chance to place all orphan int models into a `cross` folder. Related work items: #17212
This commit is contained in:
commit
670cf9f30f
6 changed files with 237 additions and 2 deletions
|
|
@ -47,4 +47,6 @@ seeds:
|
||||||
|
|
||||||
vars:
|
vars:
|
||||||
"dbt_date:time_zone": "Europe/London"
|
"dbt_date:time_zone": "Europe/London"
|
||||||
"start_date": "'2020-01-01'"
|
# A general cutoff date for relevancy. Many models assume this to be the point
|
||||||
|
# in time after which they should work.
|
||||||
|
"start_date": "'2020-01-01'"
|
||||||
147
models/intermediate/cross/int_daily_currency_exchange_rates.sql
Normal file
147
models/intermediate/cross/int_daily_currency_exchange_rates.sql
Normal file
|
|
@ -0,0 +1,147 @@
|
||||||
|
/*
|
||||||
|
How deep into the future since the last available real rate should rates be
|
||||||
|
projected. Must follow be a valid postgres interval string, such as '1 years'
|
||||||
|
See examples here:
|
||||||
|
https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT-EXAMPLES
|
||||||
|
*/
|
||||||
|
{% set interval_into_the_future = "'3 years'" %}
|
||||||
|
|
||||||
|
with
|
||||||
|
stg_xedotcom__exchange_rates as (
|
||||||
|
select * from {{ ref("stg_xedotcom__exchange_rates") }}
|
||||||
|
),
|
||||||
|
-- CTEs for projecting forward
|
||||||
|
most_recent_xe_date_per_curr_pair as (
|
||||||
|
select from_currency, to_currency, max(rate_date_utc) as most_recent_date_utc
|
||||||
|
from stg_xedotcom__exchange_rates
|
||||||
|
group by 1, 2
|
||||||
|
),
|
||||||
|
last_xe_rate_per_curr_pair as (
|
||||||
|
select xe.from_currency, xe.to_currency, xe.rate
|
||||||
|
from stg_xedotcom__exchange_rates xe
|
||||||
|
inner join
|
||||||
|
most_recent_xe_date_per_curr_pair recent
|
||||||
|
on xe.from_currency = recent.from_currency
|
||||||
|
and xe.to_currency = recent.to_currency
|
||||||
|
and xe.rate_date_utc = recent.most_recent_date_utc
|
||||||
|
),
|
||||||
|
future_dates as (
|
||||||
|
select date_trunc('day', day_series)::date as future_date
|
||||||
|
from
|
||||||
|
generate_series
|
||||||
|
(
|
||||||
|
(
|
||||||
|
select max(most_recent_date_utc)
|
||||||
|
from most_recent_xe_date_per_curr_pair
|
||||||
|
)::timestamp
|
||||||
|
+ interval '1 days', -- we don't want to pick the last real day
|
||||||
|
(
|
||||||
|
select max(most_recent_date_utc)
|
||||||
|
from most_recent_xe_date_per_curr_pair
|
||||||
|
)::timestamp
|
||||||
|
+ interval {{ interval_into_the_future }},
|
||||||
|
'1 day'::interval
|
||||||
|
) day_series
|
||||||
|
),
|
||||||
|
projected_future_rates as (
|
||||||
|
select
|
||||||
|
concat(
|
||||||
|
last_rate.from_currency,
|
||||||
|
last_rate.to_currency,
|
||||||
|
future_dates.future_date,
|
||||||
|
'xe_inferred',
|
||||||
|
'forecast'
|
||||||
|
) as id_exchange_rate,
|
||||||
|
last_rate.from_currency,
|
||||||
|
last_rate.to_currency,
|
||||||
|
last_rate.rate,
|
||||||
|
future_dates.future_date as rate_date_utc,
|
||||||
|
'xe_inferred' as source,
|
||||||
|
'forecast' as rate_version,
|
||||||
|
now() as updated_at_utc
|
||||||
|
from future_dates future_dates
|
||||||
|
cross join last_xe_rate_per_curr_pair last_rate
|
||||||
|
),
|
||||||
|
-- CTEs for projecting backward
|
||||||
|
oldest_xe_date_per_curr_pair as (
|
||||||
|
select from_currency, to_currency, max(rate_date_utc) as oldest_date_utc
|
||||||
|
from stg_xedotcom__exchange_rates
|
||||||
|
group by 1, 2
|
||||||
|
),
|
||||||
|
first_xe_rate_per_curr_pair as (
|
||||||
|
select xe.from_currency, xe.to_currency, xe.rate
|
||||||
|
from stg_xedotcom__exchange_rates xe
|
||||||
|
inner join
|
||||||
|
oldest_xe_date_per_curr_pair recent
|
||||||
|
on xe.from_currency = recent.from_currency
|
||||||
|
and xe.to_currency = recent.to_currency
|
||||||
|
and xe.rate_date_utc = recent.oldest_date_utc
|
||||||
|
),
|
||||||
|
past_dates as (
|
||||||
|
select date_trunc('day', day_series)::date as past_date
|
||||||
|
from
|
||||||
|
generate_series
|
||||||
|
(
|
||||||
|
{{ var("start_date") }}::timestamp,
|
||||||
|
(
|
||||||
|
select max(most_recent_date_utc)
|
||||||
|
from most_recent_xe_date_per_curr_pair
|
||||||
|
)::timestamp
|
||||||
|
- interval '2 days', -- we don't want to pick the last real day,
|
||||||
|
'1 day'::interval
|
||||||
|
) day_series
|
||||||
|
),
|
||||||
|
projected_past_rates as (
|
||||||
|
select
|
||||||
|
concat(
|
||||||
|
first_rate.from_currency,
|
||||||
|
first_rate.to_currency,
|
||||||
|
past_dates.past_date,
|
||||||
|
'xe_inferred',
|
||||||
|
'guess'
|
||||||
|
) as id_exchange_rate,
|
||||||
|
first_rate.from_currency,
|
||||||
|
first_rate.to_currency,
|
||||||
|
first_rate.rate,
|
||||||
|
past_dates.past_date as rate_date_utc,
|
||||||
|
'xe_inferred' as source,
|
||||||
|
'guess' as rate_version,
|
||||||
|
now() as updated_at_utc
|
||||||
|
from past_dates
|
||||||
|
cross join first_xe_rate_per_curr_pair first_rate
|
||||||
|
)
|
||||||
|
-- here we union actual stuff, future projected stuff and past assumed stuff
|
||||||
|
select
|
||||||
|
concat(
|
||||||
|
xe.from_currency, xe.to_currency, xe.rate_date_utc, 'xedotcom', 'actual'
|
||||||
|
) as id_exchange_rate, -- the id from staging is not useful here because dimensionality is different: here we have rate_source and rate_version
|
||||||
|
xe.from_currency,
|
||||||
|
xe.to_currency,
|
||||||
|
xe.rate,
|
||||||
|
xe.rate_date_utc,
|
||||||
|
'xedotcom' as source,
|
||||||
|
'actual' as rate_version,
|
||||||
|
xe.dwh_extracted_at_utc as updated_at_utc
|
||||||
|
from stg_xedotcom__exchange_rates xe
|
||||||
|
union all
|
||||||
|
select
|
||||||
|
fu.id_exchange_rate,
|
||||||
|
fu.from_currency,
|
||||||
|
fu.to_currency,
|
||||||
|
fu.rate,
|
||||||
|
fu.rate_date_utc,
|
||||||
|
fu.source,
|
||||||
|
fu.rate_version,
|
||||||
|
fu.updated_at_utc
|
||||||
|
from projected_future_rates fu
|
||||||
|
union all
|
||||||
|
select
|
||||||
|
pa.id_exchange_rate,
|
||||||
|
pa.from_currency,
|
||||||
|
pa.to_currency,
|
||||||
|
pa.rate,
|
||||||
|
pa.rate_date_utc,
|
||||||
|
pa.source,
|
||||||
|
pa.rate_version,
|
||||||
|
pa.updated_at_utc
|
||||||
|
from projected_past_rates pa
|
||||||
|
|
@ -4,4 +4,4 @@ with
|
||||||
select d.date_day as rate_date, r.*
|
select d.date_day as rate_date, r.*
|
||||||
from dates d
|
from dates d
|
||||||
cross join hardcoded_rates r
|
cross join hardcoded_rates r
|
||||||
where d.date_day > cast('2020-01-01' as date)
|
where d.date_day > cast({{ var("start_date") }} as date)
|
||||||
86
models/intermediate/cross/schema.yml
Normal file
86
models/intermediate/cross/schema.yml
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
models:
|
||||||
|
- name: int_daily_currency_exchange_rates
|
||||||
|
description: >-
|
||||||
|
This model holds a lot of data on currency exchange rates. The time
|
||||||
|
granularity is daily. Each record holds a currency pair for a specific
|
||||||
|
day, source and version.
|
||||||
|
|
||||||
|
Actual rates are sourced from xe.com data. The `guessed` and `forecast`
|
||||||
|
versions are built by simply 'pushing' the first/last exchange rate on
|
||||||
|
record. Basically, wherever we dont' have data for a date, we pick the
|
||||||
|
closest actual data point that comes from xe.com. Bear in mind this means
|
||||||
|
that `forecast` version records will change on a daily basis as actual
|
||||||
|
data moves forwards, meaning you shouldn't assume your money amounts
|
||||||
|
converted in the future should always stay put.
|
||||||
|
|
||||||
|
Note that, given the dimensionality, getting a simple time series for a
|
||||||
|
currency pair will require a bit of filtering.
|
||||||
|
|
||||||
|
Reverse rates are explicit. This means that, for any given day and any
|
||||||
|
given currency pair, you will find two records with opposite from/to
|
||||||
|
positions. So, for 2024-01-01, you will find both a EUR->USD record and a
|
||||||
|
USD->EUR record with the opposite rate (1/rate).
|
||||||
|
columns:
|
||||||
|
- name: id_exchange_rate
|
||||||
|
data_type: text
|
||||||
|
description: A unique ID for the record, derived from concatenating the
|
||||||
|
currencies, date, source and version. Currency order is relevant
|
||||||
|
(EURUSD != USDEUR).
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
|
- unique
|
||||||
|
- name: from_currency
|
||||||
|
data_type: character
|
||||||
|
description: The source currency, represented as an ISO 4217 code.
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
|
- name: to_currency
|
||||||
|
data_type: character
|
||||||
|
description: The target currency, represented as an ISO 4217 code.
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
|
- name: rate
|
||||||
|
data_type: numeric
|
||||||
|
description: >-
|
||||||
|
The exchange rate, represented as the units of the target currency
|
||||||
|
that one unit of source currency gets you. So, from_currency=USD to
|
||||||
|
to_currency=PLN with rate=4.2 should be read as '1 US Dollar buys me
|
||||||
|
4.2 Polish Zlotys'.
|
||||||
|
|
||||||
|
For same currency pairs (EUR to EUR, USD to USD, etc). The rate will
|
||||||
|
always be one.
|
||||||
|
|
||||||
|
The rate can be smaller than one, but can't be negative.
|
||||||
|
tests:
|
||||||
|
- not_negative_or_zero
|
||||||
|
- not_null
|
||||||
|
- name: rate_date_utc
|
||||||
|
data_type: date
|
||||||
|
description: The date in which the rate record is relevant.
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
|
- name: source
|
||||||
|
data_type: text
|
||||||
|
description: Where is the data coming from. Records that are composed from
|
||||||
|
making assumptions on real data will contain `_inferred`.
|
||||||
|
- name: rate_version
|
||||||
|
data_type: text
|
||||||
|
description: The version of the rate. This can be one of `actual` (the rate is a
|
||||||
|
reality fact), `forecast` (the rate sits in the future and is a guess
|
||||||
|
in nature) or `guess` (the rate sits in the past and is a guess in
|
||||||
|
nature). Note that one currency pair can have multiple rate versions
|
||||||
|
on the same date.
|
||||||
|
tests:
|
||||||
|
- accepted_values:
|
||||||
|
values:
|
||||||
|
- guess
|
||||||
|
- actual
|
||||||
|
- forecast
|
||||||
|
- not_null
|
||||||
|
- name: updated_at_utc
|
||||||
|
data_type: timestamp with time zone
|
||||||
|
description: For external sources, this will be the point in time when the
|
||||||
|
information was obtained from them. For stuff we make up here in the
|
||||||
|
DWH, this will be the point in time when we made the assumption.
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
Loading…
Add table
Add a link
Reference in a new issue