diff --git a/code_thingies/database/README.md b/code_thingies/database/README.md index aee293e..a700d18 100644 --- a/code_thingies/database/README.md +++ b/code_thingies/database/README.md @@ -10,14 +10,14 @@ This dir contains some useful bits to raise a local PostgreSQL instance with Doc - Run the following commands to get the database ready in it's starting state ```SQL -CREATE USER transformation_user WITH ENCRYPTED PASSWORD 'transformation_user_password'; - CREATE DATABASE airbnb; -- Connect to your newly created `airbnb` database for the next commands. CREATE SCHEMA raw; +-- The following tables should be created in the `raw` schema + CREATE TABLE raw_listings ( id INTEGER, listing_url VARCHAR(1000), @@ -46,6 +46,22 @@ CREATE TABLE raw_hosts ( updated_at TIMESTAMP ); + +CREATE SCHEMA dev; + + +-- Create a user for dbt activity +CREATE USER transformation_user WITH ENCRYPTED PASSWORD 'transformation_user_password'; + +-- Allow dbt user to read from raw schema +GRANT CONNECT ON DATABASE airbnb TO transformation_user; +GRANT USAGE ON SCHEMA raw TO transformation_user; +GRANT SELECT ON ALL TABLES IN SCHEMA raw TO transformation_user; +ALTER DEFAULT PRIVILEGES IN SCHEMA raw GRANT SELECT ON TABLES TO transformation_user; + +GRANT ALL ON SCHEMA dev TO transformation_user; +ALTER SCHEMA dev owner to transformation_user; + ``` After, you will have to download some CSV files with the data to populate the database. The AWS CLI commands below will download them for you: diff --git a/code_thingies/dbtlearn/.gitignore b/code_thingies/dbtlearn/.gitignore new file mode 100644 index 0000000..49f147c --- /dev/null +++ b/code_thingies/dbtlearn/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/code_thingies/dbtlearn/README.md b/code_thingies/dbtlearn/README.md new file mode 100644 index 0000000..1c5c6d7 --- /dev/null +++ b/code_thingies/dbtlearn/README.md @@ -0,0 +1,44 @@ +# dbt project + +This is the dbt project for the course. + +## Set up + +You need to place a profile for the local postgres instance in `~/.dbt/profiles.yaml`. See below a sample config that should be a good starting point if you follow the instructions in the `database` dir of this project. + +```yaml +dbtlearn: + outputs: + + dev: + type: postgres + threads: 4 + host: localhost + port: 5432 + user: the user + pass: the password + dbname: airbnb + schema: dev + + target: dev +``` + +Once you have set this up and the database as well, you can run `dbt debug` to ensure everything is set up correctly and dbt can reach the database. + + + +# DBT noisy things below + +### Using the starter project + +Try running the following commands: +- dbt run +- dbt test + + +### Resources: +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/code_thingies/dbtlearn/analyses/.gitkeep b/code_thingies/dbtlearn/analyses/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/code_thingies/dbtlearn/dbt_project.yml b/code_thingies/dbtlearn/dbt_project.yml new file mode 100644 index 0000000..568066b --- /dev/null +++ b/code_thingies/dbtlearn/dbt_project.yml @@ -0,0 +1,37 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'dbtlearn' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'dbtlearn' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views. These settings can be overridden in the individual model +# files using the `{{ config(...) }}` macro. +models: + dbtlearn: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/code_thingies/dbtlearn/macros/.gitkeep b/code_thingies/dbtlearn/macros/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/code_thingies/dbtlearn/models/example/my_first_dbt_model.sql b/code_thingies/dbtlearn/models/example/my_first_dbt_model.sql new file mode 100644 index 0000000..f31a12d --- /dev/null +++ b/code_thingies/dbtlearn/models/example/my_first_dbt_model.sql @@ -0,0 +1,27 @@ + +/* + Welcome to your first dbt model! + Did you know that you can also configure models directly within SQL files? + This will override configurations stated in dbt_project.yml + + Try changing "table" to "view" below +*/ + +{{ config(materialized='table') }} + +with source_data as ( + + select 1 as id + union all + select null as id + +) + +select * +from source_data + +/* + Uncomment the line below to remove records with null `id` values +*/ + +-- where id is not null diff --git a/code_thingies/dbtlearn/models/example/my_second_dbt_model.sql b/code_thingies/dbtlearn/models/example/my_second_dbt_model.sql new file mode 100644 index 0000000..c91f879 --- /dev/null +++ b/code_thingies/dbtlearn/models/example/my_second_dbt_model.sql @@ -0,0 +1,6 @@ + +-- Use the `ref` function to select from other models + +select * +from {{ ref('my_first_dbt_model') }} +where id = 1 diff --git a/code_thingies/dbtlearn/models/example/schema.yml b/code_thingies/dbtlearn/models/example/schema.yml new file mode 100644 index 0000000..2a53081 --- /dev/null +++ b/code_thingies/dbtlearn/models/example/schema.yml @@ -0,0 +1,21 @@ + +version: 2 + +models: + - name: my_first_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + - not_null + + - name: my_second_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + - not_null diff --git a/code_thingies/dbtlearn/seeds/.gitkeep b/code_thingies/dbtlearn/seeds/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/code_thingies/dbtlearn/snapshots/.gitkeep b/code_thingies/dbtlearn/snapshots/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/code_thingies/dbtlearn/tests/.gitkeep b/code_thingies/dbtlearn/tests/.gitkeep new file mode 100644 index 0000000..e69de29