From 7480222cc765e815212ca3f8eb0dd1d7a6d310ee Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Tue, 31 Oct 2023 17:22:51 +0100 Subject: [PATCH] Thingies --- .../models/mart/mart_fullmoon_reviews.sql | 27 +++++++++++++++++++ code_thingies/dbtlearn/models/sources.yml | 12 +++++++++ .../dbtlearn/models/src/src_hosts.sql | 2 +- .../dbtlearn/models/src/src_listings.sql | 2 +- .../dbtlearn/models/src/src_reviews.sql | 2 +- notes/8.md | 7 ++++- notes/sections1-7.md | 10 +------ 7 files changed, 49 insertions(+), 13 deletions(-) create mode 100644 code_thingies/dbtlearn/models/mart/mart_fullmoon_reviews.sql create mode 100644 code_thingies/dbtlearn/models/sources.yml diff --git a/code_thingies/dbtlearn/models/mart/mart_fullmoon_reviews.sql b/code_thingies/dbtlearn/models/mart/mart_fullmoon_reviews.sql new file mode 100644 index 0000000..199b808 --- /dev/null +++ b/code_thingies/dbtlearn/models/mart/mart_fullmoon_reviews.sql @@ -0,0 +1,27 @@ +{{ + config( + materialized = 'table' + ) +}} + +WITH fact_reviews AS ( + SELECT * + FROM + {{ ref('fact_reviews') }} +), +full_moon_dates AS ( + SELECT * + FROM + {{ ref('seed_full_moon_dates')}} +) + +SELECT + fr.*, + CASE + WHEN fm.full_moon_date IS NULL THEN 'not full moon' + ELSE 'full moon' + END AS is_full_moon +FROM + fact_reviews fr + LEFT JOIN full_moon_dates fm + ON (fr.review_date::date) = (fm.full_moon_date + interval '1' day) \ No newline at end of file diff --git a/code_thingies/dbtlearn/models/sources.yml b/code_thingies/dbtlearn/models/sources.yml new file mode 100644 index 0000000..0e2db1c --- /dev/null +++ b/code_thingies/dbtlearn/models/sources.yml @@ -0,0 +1,12 @@ +version: 2 + +sources: + - name: airbnb + schema: raw + tables: + - name: listings + identifier: raw_listings + - name: hosts + identifier: raw_hosts + - name: reviews + identifier: raw_reviews \ No newline at end of file diff --git a/code_thingies/dbtlearn/models/src/src_hosts.sql b/code_thingies/dbtlearn/models/src/src_hosts.sql index b33b25d..9d9d5b3 100644 --- a/code_thingies/dbtlearn/models/src/src_hosts.sql +++ b/code_thingies/dbtlearn/models/src/src_hosts.sql @@ -1,6 +1,6 @@ WITH raw_hosts AS ( SELECT * - FROM raw.raw_hosts + FROM {{ source ('airbnb', 'hosts')}} ) SELECT id as host_id, diff --git a/code_thingies/dbtlearn/models/src/src_listings.sql b/code_thingies/dbtlearn/models/src/src_listings.sql index c68838c..4c09b3e 100644 --- a/code_thingies/dbtlearn/models/src/src_listings.sql +++ b/code_thingies/dbtlearn/models/src/src_listings.sql @@ -1,6 +1,6 @@ WITH raw_listings AS ( SELECT * - FROM raw.raw_listings + FROM {{ source ('airbnb', 'listings')}} ) SELECT id AS listing_id, diff --git a/code_thingies/dbtlearn/models/src/src_reviews.sql b/code_thingies/dbtlearn/models/src/src_reviews.sql index 59d8167..987faeb 100644 --- a/code_thingies/dbtlearn/models/src/src_reviews.sql +++ b/code_thingies/dbtlearn/models/src/src_reviews.sql @@ -1,6 +1,6 @@ WITH raw_reviews AS ( SELECT * - FROM raw.raw_reviews + FROM {{ source ('airbnb', 'reviews')}} ) SELECT listing_id, diff --git a/notes/8.md b/notes/8.md index 0d1c83f..22ed54d 100644 --- a/notes/8.md +++ b/notes/8.md @@ -42,4 +42,9 @@ WHERE Bear in mind that how to define the strategy to determine what should be loaded is up to the engineer. Any SQL can be placed within the `if is_incremental()` block. In the example above, we have a date field that easily signals what's the most recent date the table has currently seen. -## \ No newline at end of file +## Sources and seeds + +Seeds are local files that you upload to a DWH from dbt. You place them as CSVs in the `seeds` folder. + + +Sources are an abstraction layer on top of the input tables. They are not strictly necessary, but can help make the project more structured. To create sources, you create a `sources.yml` file and place it in the `models` dir. \ No newline at end of file diff --git a/notes/sections1-7.md b/notes/sections1-7.md index 22bc677..4e25c8a 100644 --- a/notes/sections1-7.md +++ b/notes/sections1-7.md @@ -105,12 +105,4 @@ dbt makes sense nowadays because the modern data stack makes transformations wit - `dbt_project.yml`: header of the project, with stuff like versioning, the default profile for the project, the paths to different folders, etc. -This is a pic of the data flow we are going to build: ![img.png](../images/dataflow_overview.png) - -## Sources and seeds - -Seeds are local files that you upload to a DWH from dbt. You place them as CSVs in the `seeds` folder. - - -Sources are an abstraction layer on top of the input tables. - +This is a pic of the data flow we are going to build: ![img.png](../images/dataflow_overview.png) \ No newline at end of file