From ab581b0fb5076056f9b7066d9e1c0ec5ac9c1d8b Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Mon, 30 Oct 2023 18:04:19 +0100 Subject: [PATCH] More models and knowledge --- code_thingies/dbtlearn/dbt_project.yml | 6 ++--- .../models/dim/dim_hosts_cleansed.sql | 16 ++++++++++++ .../models/dim/dim_listings_cleansed.sql | 20 +++++++++++++++ .../dbtlearn/models/fact/fact_reviews.sql | 19 ++++++++++++++ notes/8.md | 25 +++++++++++++++++++ 5 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 code_thingies/dbtlearn/models/dim/dim_hosts_cleansed.sql create mode 100644 code_thingies/dbtlearn/models/dim/dim_listings_cleansed.sql create mode 100644 code_thingies/dbtlearn/models/fact/fact_reviews.sql diff --git a/code_thingies/dbtlearn/dbt_project.yml b/code_thingies/dbtlearn/dbt_project.yml index 6752aa4..3c2a10f 100644 --- a/code_thingies/dbtlearn/dbt_project.yml +++ b/code_thingies/dbtlearn/dbt_project.yml @@ -27,8 +27,8 @@ clean-targets: # directories to be removed by `dbt clean` # Configuring models # Full documentation: https://docs.getdbt.com/docs/configuring-models -# In this example config, we tell dbt to build all models in the example/ -# directory as views. These settings can be overridden in the individual model -# files using the `{{ config(...) }}` macro. models: dbtlearn: + +materialized: view # Default way to materialize is view + dim: + +materialized: table diff --git a/code_thingies/dbtlearn/models/dim/dim_hosts_cleansed.sql b/code_thingies/dbtlearn/models/dim/dim_hosts_cleansed.sql new file mode 100644 index 0000000..2a1f43d --- /dev/null +++ b/code_thingies/dbtlearn/models/dim/dim_hosts_cleansed.sql @@ -0,0 +1,16 @@ +WITH src_hosts AS( + SELECT * + FROM {{ ref('src_hosts') }} +) +SELECT + host_id, + COALESCE( + host_name, + 'Anonymous' + ) AS host_name, + CASE + WHEN flag_is_superhost = 't' THEN true + WHEN flag_is_superhost = 'f' THEN false + END::bool AS flag_is_superhost +FROM + src_hosts \ No newline at end of file diff --git a/code_thingies/dbtlearn/models/dim/dim_listings_cleansed.sql b/code_thingies/dbtlearn/models/dim/dim_listings_cleansed.sql new file mode 100644 index 0000000..6e5de55 --- /dev/null +++ b/code_thingies/dbtlearn/models/dim/dim_listings_cleansed.sql @@ -0,0 +1,20 @@ +WITH src_listings AS ( + SELECT * + FROM + {{ ref('src_listings') }} +) +SELECT + listing_id, + listing_name, + room_type, + CASE + WHEN minimum_nights = 0 THEN 1 + ELSE minimum_nights + END AS mininum_nights, + host_id, + REPLACE(price_str,'$','')::money AS price, + created_at, + updated_at +FROM + src_listings + diff --git a/code_thingies/dbtlearn/models/fact/fact_reviews.sql b/code_thingies/dbtlearn/models/fact/fact_reviews.sql new file mode 100644 index 0000000..8a707fa --- /dev/null +++ b/code_thingies/dbtlearn/models/fact/fact_reviews.sql @@ -0,0 +1,19 @@ +{{ + config( + materialized = 'incremental', + on_schema_change = 'fail' + ) +}} +WITH src_reviews AS ( + SELECT * + FROM + {{ ref('src_reviews') }} +) +SELECT * +FROM + src_reviews +WHERE + review_text IS NOT NULL +{% if is_incremental() %} + AND review_date > (SELECT MAX(review_date) FROM {{ this }}) +{% endif %} \ No newline at end of file diff --git a/notes/8.md b/notes/8.md index ea53e09..0d1c83f 100644 --- a/notes/8.md +++ b/notes/8.md @@ -16,5 +16,30 @@ Models can be related between themselves to map dependencies. - Incremental: also a table, but can only create new records, not update - Ephemeral: it's actually NOT materializing. The model can be used by dependents, but it won't be materialized in the DB. It will truly only be a CTE that gets used by other models. Mostly for intermediate states in transformations. +Materializations can be defined at the model level, folder level and project level. This can be modified in the `dbt_project.yml` file, under the `models` key. + +To set materialization config at the model level, one must make a jinja tag at the start of the file and call the `config` dbt function. See an example below: + +```python +{{ + config( + materialized = 'incremental', + on_schema_change = 'fail' + ) +}} +``` + +Incremental materializations need to a block that defines the logic to apply in incremental loads (as opposed to the 'normal' logic, that gets apply on first runs). See below an example: + +```SQL +[... rest of query ...] +WHERE + review_text IS NOT NULL +{% if is_incremental() %} + AND review_date > (SELECT MAX(review_date) FROM {{ this }}) +{% endif %} +``` + +Bear in mind that how to define the strategy to determine what should be loaded is up to the engineer. Any SQL can be placed within the `if is_incremental()` block. In the example above, we have a date field that easily signals what's the most recent date the table has currently seen. ## \ No newline at end of file