2025-07-04 12:25:21 +02:00
#!/bin/bash
set -euo pipefail
STARTING_DIR = "/home/azureuser"
2025-07-08 17:05:45 +02:00
cd " $STARTING_DIR "
2025-07-04 12:25:21 +02:00
# === CONFIGURATION ===
DBT_PROJECT = "dwh_dbt"
DBT_TARGET = "prd"
PROFILE_YML = " $STARTING_DIR /.dbt/profiles.yml "
# === Flag defaults ===
SEND_SLACK = false
# === Parse flags ===
while [ [ $# -gt 0 ] ] ; do
case " $1 " in
-s| --slack)
SEND_SLACK = true
shift
; ;
-*)
echo " ❌ Unknown option: $1 "
exit 1
; ;
*)
break
; ;
esac
done
# === Positional arguments ===
SCHEMAS = " $1 "
MANIFEST_PATH = " $2 "
shift 2
IFS = ',' read -r -a SCHEMA_ARRAY <<< " $SCHEMAS "
# === Tool check/install ===
install_tool_if_missing( ) {
TOOL_CALL_NAME = $1
TOOL_APT_NAME = $2
if ! command -v " $TOOL_CALL_NAME " & >/dev/null; then
echo " 🔧 Installing missing tool: $TOOL_APT_NAME "
sudo apt-get update -qq
sudo apt-get install -y " $TOOL_APT_NAME "
else
echo " ✅ $TOOL_APT_NAME is installed "
fi
}
install_tool_if_missing jq jq
install_tool_if_missing yq yq
install_tool_if_missing psql postgresql-client
# === Slack webhook setup ===
script_dir = $( dirname " $0 " )
webhooks_file = "slack_webhook_urls.txt"
env_file = " $script_dir / $webhooks_file "
if [ -f " $env_file " ] ; then
export $( grep -v '^#' " $env_file " | xargs)
else
echo " Error: $webhooks_file file not found in the script directory. "
exit 1
fi
# === Load DB credentials from profiles.yml ===
echo " 🔐 Loading DB credentials from $PROFILE_YML ... "
DB_NAME = $( yq e " . ${ DBT_PROJECT } .outputs. ${ DBT_TARGET } .dbname " " $PROFILE_YML " )
DB_USER = $( yq e " . ${ DBT_PROJECT } .outputs. ${ DBT_TARGET } .user " " $PROFILE_YML " )
DB_HOST = $( yq e " . ${ DBT_PROJECT } .outputs. ${ DBT_TARGET } .host " " $PROFILE_YML " )
DB_PORT = $( yq e " . ${ DBT_PROJECT } .outputs. ${ DBT_TARGET } .port " " $PROFILE_YML " )
export PGPASSWORD = $( yq e " . ${ DBT_PROJECT } .outputs. ${ DBT_TARGET } .pass " " $PROFILE_YML " )
# === Get list of tables/views from Postgres ===
echo "🗃️ Reading current tables/views from PostgreSQL..."
POSTGRES_OBJECTS = ( )
for SCHEMA in " ${ SCHEMA_ARRAY [@] } " ; do
echo " 🔎 Scanning schema: $SCHEMA "
TABLES = $( psql -h " $DB_HOST " -p " $DB_PORT " -U " $DB_USER " -d " $DB_NAME " -Atc "
2025-07-08 17:05:45 +02:00
SELECT LOWER( table_schema || '.' || table_name)
2025-07-04 12:25:21 +02:00
FROM information_schema.tables
WHERE table_schema = '$SCHEMA'
2025-07-08 17:05:45 +02:00
AND table_type IN ( 'BASE TABLE' , 'VIEW' )
AND table_name NOT LIKE 'pg_%'
ORDER BY table_schema, table_name;
2025-07-04 12:25:21 +02:00
" )
while IFS = read -r tbl; do
2025-07-08 17:05:45 +02:00
tbl_cleaned = $( echo " $tbl " | tr -d '[:space:]' )
[ [ -n " $tbl_cleaned " ] ] && POSTGRES_OBJECTS += ( " $tbl_cleaned " )
2025-07-04 12:25:21 +02:00
done <<< " $TABLES "
done
POSTGRES_OBJECTS = ( $( printf "%s\n" " ${ POSTGRES_OBJECTS [@] } " | sort -u) )
# === Parse manifest.json for dbt model output names ===
echo "📦 Extracting model output names from dbt manifest..."
DBT_OBJECTS = ( )
DBT_ENTRIES = $( jq -r '
.nodes | to_entries[ ] |
select ( .value.resource_type = = "model" or .value.resource_type = = "seed" ) |
.value.schema + "." + .value.alias
' " $MANIFEST_PATH " )
while IFS = read -r entry; do
2025-07-08 17:05:45 +02:00
entry_cleaned = $( echo " $entry " | tr -d '[:space:]' | tr '[:upper:]' '[:lower:]' )
[ [ -n " $entry_cleaned " ] ] && DBT_OBJECTS += ( " $entry_cleaned " )
2025-07-04 12:25:21 +02:00
done <<< " $DBT_ENTRIES "
DBT_OBJECTS = ( $( printf "%s\n" " ${ DBT_OBJECTS [@] } " | sort -u) )
# === Compare ===
echo "📊 Comparing DBT models vs Postgres state..."
2025-07-08 17:05:45 +02:00
RELEVANT_MODELS = ( $( comm -12 <( printf "%s\n" " ${ POSTGRES_OBJECTS [@] } " | sort) <( printf "%s\n" " ${ DBT_OBJECTS [@] } " | sort) ) )
STALE_MODELS = ( $( comm -23 <( printf "%s\n" " ${ POSTGRES_OBJECTS [@] } " | sort) <( printf "%s\n" " ${ DBT_OBJECTS [@] } " | sort) ) )
2025-07-04 12:25:21 +02:00
# === Output ===
echo ""
echo "✅ Relevant models (in both DB and DBT):"
printf "%s\n" " ${ RELEVANT_MODELS [@] } " | sort
echo ""
echo "⚠️ Stale models (in DB but NOT in DBT):"
printf "%s\n" " ${ STALE_MODELS [@] } " | sort
# === Format stale models for Slack ===
if [ " $SEND_SLACK " = true ] ; then
echo "✅ Sending slack message with results."
if [ ${# STALE_MODELS [@] } -eq 0 ] ; then
SLACK_MSG = ":white_check_mark::white_check_mark::white_check_mark: dbt models reviewed. No stale models found in the database! :white_check_mark::white_check_mark::white_check_mark:"
curl -X POST -H 'Content-type: application/json' \
--data " {\"text\":\" $SLACK_MSG \"} " \
" $SLACK_RECEIPT_WEBHOOK_URL "
else
SLACK_MSG = ":rotating_light::rotating_light::rotating_light: Stale models detected in Postgres (not in dbt manifest): :rotating_light::rotating_light::rotating_light:\n"
for model in " ${ STALE_MODELS [@] } " ; do
SLACK_MSG += " - \` $model \`\n "
done
curl -X POST -H 'Content-type: application/json' \
--data " {\"text\":\" $SLACK_MSG \"} " \
" $SLACK_ALERT_WEBHOOK_URL "
fi
fi