wip
This commit is contained in:
parent
4bef83f432
commit
db0d97d2e8
9 changed files with 158 additions and 3 deletions
|
|
@ -37,7 +37,7 @@ steps:
|
|||
displayName: 'Sync Foreign Data Wrappers schemas'
|
||||
|
||||
- script: |
|
||||
cd ~/dbt-ci
|
||||
cd ci
|
||||
/bin/bash build-master-artifacts.sh
|
||||
displayName: 'Build master artifacts'
|
||||
|
||||
|
|
|
|||
30
ci/README.md
30
ci/README.md
|
|
@ -1,3 +1,31 @@
|
|||
# CI
|
||||
|
||||
This folder contains things we use for Continuous Integration.
|
||||
You can setup CI pipelines for the project if you want. This enables performing certain checks in PRs and master commits, which is useful to minimize errors and ensure certain quality levels are met.
|
||||
|
||||
The details here are specific to Azure Devops. If you need to set things up in a different Git/CI env, you'll have to adjust your way into it.
|
||||
|
||||
## CI VM Setup
|
||||
|
||||
### Requirements
|
||||
|
||||
These instructions assume that:
|
||||
- You have a VM ready to be setup as the CI server.
|
||||
- You can SSH into it.
|
||||
- The VM has Docker and Docker Compose installed and ready to run.
|
||||
- The VM has `psql` installed.
|
||||
- The VM has the Azure CI agent installed.
|
||||
- That you have cloned this repository in the home folder of the user you use in that VM.
|
||||
- The DWH production instance has a CI dedicated user that can read from all sync schemas as well as `staging`, `intermediate` and `reporting`, and you have the credentials.
|
||||
|
||||
### Setting things up
|
||||
|
||||
- Create a folder in the user home directory named `dbt-ci`.
|
||||
- Create a copy of the `ci.env` file there naming it `.env` (`cp ci.env ~/dbt-ci/.env`) and fill it with values of your choice.
|
||||
- Execute the script named `ci-vm-setup.sh` in this folder. This script will take care of most of the setup that need to be executed, including:
|
||||
- Preparing the postgres database.
|
||||
- Setting up the dockerized postgres with the right database, FDW, etc.
|
||||
- Prepare the `profiles.yml` file.
|
||||
|
||||
### Connecting to Devops
|
||||
|
||||
- TBD
|
||||
24
ci/build-master-artifacts.sh
Normal file
24
ci/build-master-artifacts.sh
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
|
||||
cd ~/data-dwh-dbt-project
|
||||
|
||||
git checkout master
|
||||
git pull
|
||||
|
||||
rm -rf venv
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
dbt deps
|
||||
|
||||
rm .env
|
||||
cp ~/dbt-ci/.env .env
|
||||
set -a && source .env && set +a
|
||||
|
||||
rm -rf target/
|
||||
|
||||
dbt compile
|
||||
|
||||
mkdir -p ~/dbt-ci/master-artifacts/
|
||||
cp target/manifest.json ~/dbt-ci/master-artifacts/manifest.json
|
||||
8
ci/ci-vm-setup.sh
Normal file
8
ci/ci-vm-setup.sh
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
# Start container
|
||||
docker compose build -d
|
||||
|
||||
# Run script to set things up in Postgres (DB, FDWs, etc)
|
||||
envsubst < postgres-initial-setup.sql | psql -h $POSTGRES_HOST -U $POSTGRES_USER -d postgres
|
||||
|
||||
# Copy profiles file
|
||||
cp ci.profiles.yml ~/.dbt/profiles.yml
|
||||
10
ci/ci.env
Normal file
10
ci/ci.env
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
POSTGRES_HOST=localhost
|
||||
POSTGRES_USER=place a user here
|
||||
PGPASSWORD=place a password here
|
||||
POSTGRES_PORT=5432
|
||||
PRD_SCHEMAS_TO_SYNC="'sync_xero_superhog_limited','sync_xedotcom_currency_rates','sync_stripe_us','sync_stripe_uk','sync_hubspot','sync_guest_product','sync_default','sync_core','sync_cdb_screening','sync_cdb_screen_and_protect','sync_cdb_resolutions','sync_cdb_edeposit','sync_cdb_check_in_hero','sync_cdb_athena','staging','reporting','intermediate'"
|
||||
PRD_CI_USER='ci_reader'
|
||||
PRD_CI_PASSWORD=
|
||||
PRD_HOST=the host
|
||||
PRD_DB=the database
|
||||
PRD_PORT=the port
|
||||
13
ci/ci.profiles.yml
Normal file
13
ci/ci.profiles.yml
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
dwh_dbt:
|
||||
outputs:
|
||||
prd-pointer:
|
||||
dbname: prd-pointer
|
||||
host: "{{ env_var('POSTGRES_HOST') }}"
|
||||
port: "{{ env_var('POSTGRES_PORT') | as_number }}"
|
||||
schema: public
|
||||
user: "{{ env_var('POSTGRES_USER') }}"
|
||||
pass: "{{ env_var('PGPASSWORD') }}"
|
||||
type: postgres
|
||||
threads: 1
|
||||
|
||||
target: prd-pointer
|
||||
35
ci/docker-compose.yml
Normal file
35
ci/docker-compose.yml
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
services:
|
||||
postgres:
|
||||
image: postgres:16
|
||||
container_name: postgres_db
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
POSTGRES_DB: postgres
|
||||
ports:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
# Note that some of the values below are very HW specific. You should
|
||||
# absolutely adjust them to the available hardware where this will be
|
||||
# running. This might help if you feel lost:
|
||||
command: [
|
||||
"-c", "max_connections=XX",
|
||||
"-c", "shared_buffers=XGB",
|
||||
"-c", "effective_cache_size=XXXGB",
|
||||
"-c", "maintenance_work_mem=XXXMB",
|
||||
"-c", "checkpoint_completion_target=0.9",
|
||||
"-c", "wal_buffers=XXXMB",
|
||||
"-c", "default_statistics_target=XXX",
|
||||
"-c", "random_page_cost=1.1",
|
||||
"-c", "effective_io_concurrency=XXX",
|
||||
"-c", "work_mem=XXXkB",
|
||||
"-c", "huge_pages=off",
|
||||
"-c", "min_wal_size=XXXGB",
|
||||
"-c", "max_wal_size=XXXGB"
|
||||
]
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
driver: local
|
||||
33
ci/postgres-initial-setup.sql
Normal file
33
ci/postgres-initial-setup.sql
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
CREATE DATABASE prd-pointer;
|
||||
\c prd-pointer
|
||||
|
||||
CREATE EXTENSION postgres_fdw;
|
||||
|
||||
CREATE SERVER dwh_prd
|
||||
FOREIGN DATA WRAPPER postgres_fdw
|
||||
OPTIONS (host '$PRD_HOST', dbname '$PRD_DB', port '$PRD_PORT');
|
||||
|
||||
CREATE USER MAPPING FOR current_user
|
||||
SERVER dwh_prd
|
||||
OPTIONS (user '$PRD_CI_USER', password '$PRD_CI_PASSWORD');
|
||||
|
||||
CREATE OR REPLACE FUNCTION refresh_foreign_schemas(schema_list TEXT[]) RETURNS void AS $$
|
||||
DECLARE
|
||||
schema_name TEXT;
|
||||
BEGIN
|
||||
-- Loop through each schema in the provided list
|
||||
FOREACH schema_name IN ARRAY schema_list LOOP
|
||||
|
||||
-- Drop and recreate the schema to avoid conflicts
|
||||
EXECUTE format('DROP SCHEMA IF EXISTS %I CASCADE', schema_name);
|
||||
EXECUTE format('CREATE SCHEMA %I', schema_name);
|
||||
|
||||
-- Import all tables from the foreign server
|
||||
EXECUTE format(
|
||||
'IMPORT FOREIGN SCHEMA %I FROM SERVER dwh_prd INTO %I',
|
||||
schema_name, schema_name
|
||||
);
|
||||
|
||||
END LOOP;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
Loading…
Add table
Add a link
Reference in a new issue