dbt stuff
This commit is contained in:
parent
f9e58369ca
commit
cfaa7cfecc
12 changed files with 157 additions and 2 deletions
|
|
@ -10,14 +10,14 @@ This dir contains some useful bits to raise a local PostgreSQL instance with Doc
|
|||
- Run the following commands to get the database ready in it's starting state
|
||||
|
||||
```SQL
|
||||
CREATE USER transformation_user WITH ENCRYPTED PASSWORD 'transformation_user_password';
|
||||
|
||||
CREATE DATABASE airbnb;
|
||||
|
||||
-- Connect to your newly created `airbnb` database for the next commands.
|
||||
|
||||
CREATE SCHEMA raw;
|
||||
|
||||
-- The following tables should be created in the `raw` schema
|
||||
|
||||
CREATE TABLE raw_listings (
|
||||
id INTEGER,
|
||||
listing_url VARCHAR(1000),
|
||||
|
|
@ -46,6 +46,22 @@ CREATE TABLE raw_hosts (
|
|||
updated_at TIMESTAMP
|
||||
);
|
||||
|
||||
|
||||
CREATE SCHEMA dev;
|
||||
|
||||
|
||||
-- Create a user for dbt activity
|
||||
CREATE USER transformation_user WITH ENCRYPTED PASSWORD 'transformation_user_password';
|
||||
|
||||
-- Allow dbt user to read from raw schema
|
||||
GRANT CONNECT ON DATABASE airbnb TO transformation_user;
|
||||
GRANT USAGE ON SCHEMA raw TO transformation_user;
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA raw TO transformation_user;
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA raw GRANT SELECT ON TABLES TO transformation_user;
|
||||
|
||||
GRANT ALL ON SCHEMA dev TO transformation_user;
|
||||
ALTER SCHEMA dev owner to transformation_user;
|
||||
|
||||
```
|
||||
|
||||
After, you will have to download some CSV files with the data to populate the database. The AWS CLI commands below will download them for you:
|
||||
|
|
|
|||
4
code_thingies/dbtlearn/.gitignore
vendored
Normal file
4
code_thingies/dbtlearn/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
|
||||
target/
|
||||
dbt_packages/
|
||||
logs/
|
||||
44
code_thingies/dbtlearn/README.md
Normal file
44
code_thingies/dbtlearn/README.md
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
# dbt project
|
||||
|
||||
This is the dbt project for the course.
|
||||
|
||||
## Set up
|
||||
|
||||
You need to place a profile for the local postgres instance in `~/.dbt/profiles.yaml`. See below a sample config that should be a good starting point if you follow the instructions in the `database` dir of this project.
|
||||
|
||||
```yaml
|
||||
dbtlearn:
|
||||
outputs:
|
||||
|
||||
dev:
|
||||
type: postgres
|
||||
threads: 4
|
||||
host: localhost
|
||||
port: 5432
|
||||
user: the user
|
||||
pass: the password
|
||||
dbname: airbnb
|
||||
schema: dev
|
||||
|
||||
target: dev
|
||||
```
|
||||
|
||||
Once you have set this up and the database as well, you can run `dbt debug` to ensure everything is set up correctly and dbt can reach the database.
|
||||
|
||||
|
||||
|
||||
# DBT noisy things below
|
||||
|
||||
### Using the starter project
|
||||
|
||||
Try running the following commands:
|
||||
- dbt run
|
||||
- dbt test
|
||||
|
||||
|
||||
### Resources:
|
||||
- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
|
||||
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
|
||||
- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
|
||||
- Find [dbt events](https://events.getdbt.com) near you
|
||||
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
|
||||
0
code_thingies/dbtlearn/analyses/.gitkeep
Normal file
0
code_thingies/dbtlearn/analyses/.gitkeep
Normal file
37
code_thingies/dbtlearn/dbt_project.yml
Normal file
37
code_thingies/dbtlearn/dbt_project.yml
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
|
||||
# Name your project! Project names should contain only lowercase characters
|
||||
# and underscores. A good package name should reflect your organization's
|
||||
# name or the intended use of these models
|
||||
name: 'dbtlearn'
|
||||
version: '1.0.0'
|
||||
config-version: 2
|
||||
|
||||
# This setting configures which "profile" dbt uses for this project.
|
||||
profile: 'dbtlearn'
|
||||
|
||||
# These configurations specify where dbt should look for different types of files.
|
||||
# The `model-paths` config, for example, states that models in this project can be
|
||||
# found in the "models/" directory. You probably won't need to change these!
|
||||
model-paths: ["models"]
|
||||
analysis-paths: ["analyses"]
|
||||
test-paths: ["tests"]
|
||||
seed-paths: ["seeds"]
|
||||
macro-paths: ["macros"]
|
||||
snapshot-paths: ["snapshots"]
|
||||
|
||||
clean-targets: # directories to be removed by `dbt clean`
|
||||
- "target"
|
||||
- "dbt_packages"
|
||||
|
||||
|
||||
# Configuring models
|
||||
# Full documentation: https://docs.getdbt.com/docs/configuring-models
|
||||
|
||||
# In this example config, we tell dbt to build all models in the example/
|
||||
# directory as views. These settings can be overridden in the individual model
|
||||
# files using the `{{ config(...) }}` macro.
|
||||
models:
|
||||
dbtlearn:
|
||||
# Config indicated by + and applies to all files under models/example/
|
||||
example:
|
||||
+materialized: view
|
||||
0
code_thingies/dbtlearn/macros/.gitkeep
Normal file
0
code_thingies/dbtlearn/macros/.gitkeep
Normal file
27
code_thingies/dbtlearn/models/example/my_first_dbt_model.sql
Normal file
27
code_thingies/dbtlearn/models/example/my_first_dbt_model.sql
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
|
||||
/*
|
||||
Welcome to your first dbt model!
|
||||
Did you know that you can also configure models directly within SQL files?
|
||||
This will override configurations stated in dbt_project.yml
|
||||
|
||||
Try changing "table" to "view" below
|
||||
*/
|
||||
|
||||
{{ config(materialized='table') }}
|
||||
|
||||
with source_data as (
|
||||
|
||||
select 1 as id
|
||||
union all
|
||||
select null as id
|
||||
|
||||
)
|
||||
|
||||
select *
|
||||
from source_data
|
||||
|
||||
/*
|
||||
Uncomment the line below to remove records with null `id` values
|
||||
*/
|
||||
|
||||
-- where id is not null
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
|
||||
-- Use the `ref` function to select from other models
|
||||
|
||||
select *
|
||||
from {{ ref('my_first_dbt_model') }}
|
||||
where id = 1
|
||||
21
code_thingies/dbtlearn/models/example/schema.yml
Normal file
21
code_thingies/dbtlearn/models/example/schema.yml
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: my_first_dbt_model
|
||||
description: "A starter dbt model"
|
||||
columns:
|
||||
- name: id
|
||||
description: "The primary key for this table"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
|
||||
- name: my_second_dbt_model
|
||||
description: "A starter dbt model"
|
||||
columns:
|
||||
- name: id
|
||||
description: "The primary key for this table"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
0
code_thingies/dbtlearn/seeds/.gitkeep
Normal file
0
code_thingies/dbtlearn/seeds/.gitkeep
Normal file
0
code_thingies/dbtlearn/snapshots/.gitkeep
Normal file
0
code_thingies/dbtlearn/snapshots/.gitkeep
Normal file
0
code_thingies/dbtlearn/tests/.gitkeep
Normal file
0
code_thingies/dbtlearn/tests/.gitkeep
Normal file
Loading…
Add table
Add a link
Reference in a new issue