dbt stuff

This commit is contained in:
Pablo Martin 2023-10-29 18:40:01 +01:00
parent f9e58369ca
commit cfaa7cfecc
12 changed files with 157 additions and 2 deletions

View file

@ -10,14 +10,14 @@ This dir contains some useful bits to raise a local PostgreSQL instance with Doc
- Run the following commands to get the database ready in it's starting state
```SQL
CREATE USER transformation_user WITH ENCRYPTED PASSWORD 'transformation_user_password';
CREATE DATABASE airbnb;
-- Connect to your newly created `airbnb` database for the next commands.
CREATE SCHEMA raw;
-- The following tables should be created in the `raw` schema
CREATE TABLE raw_listings (
id INTEGER,
listing_url VARCHAR(1000),
@ -46,6 +46,22 @@ CREATE TABLE raw_hosts (
updated_at TIMESTAMP
);
CREATE SCHEMA dev;
-- Create a user for dbt activity
CREATE USER transformation_user WITH ENCRYPTED PASSWORD 'transformation_user_password';
-- Allow dbt user to read from raw schema
GRANT CONNECT ON DATABASE airbnb TO transformation_user;
GRANT USAGE ON SCHEMA raw TO transformation_user;
GRANT SELECT ON ALL TABLES IN SCHEMA raw TO transformation_user;
ALTER DEFAULT PRIVILEGES IN SCHEMA raw GRANT SELECT ON TABLES TO transformation_user;
GRANT ALL ON SCHEMA dev TO transformation_user;
ALTER SCHEMA dev owner to transformation_user;
```
After, you will have to download some CSV files with the data to populate the database. The AWS CLI commands below will download them for you:

4
code_thingies/dbtlearn/.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
target/
dbt_packages/
logs/

View file

@ -0,0 +1,44 @@
# dbt project
This is the dbt project for the course.
## Set up
You need to place a profile for the local postgres instance in `~/.dbt/profiles.yaml`. See below a sample config that should be a good starting point if you follow the instructions in the `database` dir of this project.
```yaml
dbtlearn:
outputs:
dev:
type: postgres
threads: 4
host: localhost
port: 5432
user: the user
pass: the password
dbname: airbnb
schema: dev
target: dev
```
Once you have set this up and the database as well, you can run `dbt debug` to ensure everything is set up correctly and dbt can reach the database.
# DBT noisy things below
### Using the starter project
Try running the following commands:
- dbt run
- dbt test
### Resources:
- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
- Find [dbt events](https://events.getdbt.com) near you
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices

View file

View file

@ -0,0 +1,37 @@
# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'dbtlearn'
version: '1.0.0'
config-version: 2
# This setting configures which "profile" dbt uses for this project.
profile: 'dbtlearn'
# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"
# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models
# In this example config, we tell dbt to build all models in the example/
# directory as views. These settings can be overridden in the individual model
# files using the `{{ config(...) }}` macro.
models:
dbtlearn:
# Config indicated by + and applies to all files under models/example/
example:
+materialized: view

View file

View file

@ -0,0 +1,27 @@
/*
Welcome to your first dbt model!
Did you know that you can also configure models directly within SQL files?
This will override configurations stated in dbt_project.yml
Try changing "table" to "view" below
*/
{{ config(materialized='table') }}
with source_data as (
select 1 as id
union all
select null as id
)
select *
from source_data
/*
Uncomment the line below to remove records with null `id` values
*/
-- where id is not null

View file

@ -0,0 +1,6 @@
-- Use the `ref` function to select from other models
select *
from {{ ref('my_first_dbt_model') }}
where id = 1

View file

@ -0,0 +1,21 @@
version: 2
models:
- name: my_first_dbt_model
description: "A starter dbt model"
columns:
- name: id
description: "The primary key for this table"
tests:
- unique
- not_null
- name: my_second_dbt_model
description: "A starter dbt model"
columns:
- name: id
description: "The primary key for this table"
tests:
- unique
- not_null

View file

View file