script and docs

This commit is contained in:
Pablo Martin 2025-07-04 12:25:21 +02:00
parent e0e97709c0
commit ad67a79a24
2 changed files with 181 additions and 0 deletions

150
find_orphan_models_in_db.sh Normal file
View file

@ -0,0 +1,150 @@
#!/bin/bash
set -euo pipefail
STARTING_DIR="/home/azureuser"
cd $STARTING_DIR
# === CONFIGURATION ===
DBT_PROJECT="dwh_dbt"
DBT_TARGET="prd"
PROFILE_YML="$STARTING_DIR/.dbt/profiles.yml"
# === Flag defaults ===
SEND_SLACK=false
# === Parse flags ===
while [[ $# -gt 0 ]]; do
case "$1" in
-s|--slack)
SEND_SLACK=true
shift
;;
-*)
echo "❌ Unknown option: $1"
exit 1
;;
*)
break
;;
esac
done
# === Positional arguments ===
SCHEMAS="$1"
MANIFEST_PATH="$2"
shift 2
IFS=',' read -r -a SCHEMA_ARRAY <<< "$SCHEMAS"
# === Tool check/install ===
install_tool_if_missing() {
TOOL_CALL_NAME=$1
TOOL_APT_NAME=$2
if ! command -v "$TOOL_CALL_NAME" &>/dev/null; then
echo "🔧 Installing missing tool: $TOOL_APT_NAME"
sudo apt-get update -qq
sudo apt-get install -y "$TOOL_APT_NAME"
else
echo "$TOOL_APT_NAME is installed"
fi
}
install_tool_if_missing jq jq
install_tool_if_missing yq yq
install_tool_if_missing psql postgresql-client
# === Slack webhook setup ===
script_dir=$(dirname "$0")
webhooks_file="slack_webhook_urls.txt"
env_file="$script_dir/$webhooks_file"
if [ -f "$env_file" ]; then
export $(grep -v '^#' "$env_file" | xargs)
else
echo "Error: $webhooks_file file not found in the script directory."
exit 1
fi
# === Load DB credentials from profiles.yml ===
echo "🔐 Loading DB credentials from $PROFILE_YML..."
DB_NAME=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.dbname" "$PROFILE_YML")
DB_USER=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.user" "$PROFILE_YML")
DB_HOST=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.host" "$PROFILE_YML")
DB_PORT=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.port" "$PROFILE_YML")
export PGPASSWORD=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.pass" "$PROFILE_YML")
# === Get list of tables/views from Postgres ===
echo "🗃️ Reading current tables/views from PostgreSQL..."
POSTGRES_OBJECTS=()
for SCHEMA in "${SCHEMA_ARRAY[@]}"; do
echo "🔎 Scanning schema: $SCHEMA"
TABLES=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -Atc "
SELECT table_schema || '.' || table_name
FROM information_schema.tables
WHERE table_schema = '$SCHEMA'
AND table_type IN ('BASE TABLE', 'VIEW');
")
while IFS= read -r tbl; do
[[ -n "$tbl" ]] && POSTGRES_OBJECTS+=("${tbl,,}")
done <<< "$TABLES"
done
POSTGRES_OBJECTS=($(printf "%s\n" "${POSTGRES_OBJECTS[@]}" | sort -u))
# === Parse manifest.json for dbt model output names ===
echo "📦 Extracting model output names from dbt manifest..."
DBT_OBJECTS=()
DBT_ENTRIES=$(jq -r '
.nodes | to_entries[] |
select(.value.resource_type == "model" or .value.resource_type == "seed") |
.value.schema + "." + .value.alias
' "$MANIFEST_PATH")
while IFS= read -r entry; do
[[ -n "$entry" ]] && DBT_OBJECTS+=("${entry,,}")
done <<< "$DBT_ENTRIES"
DBT_OBJECTS=($(printf "%s\n" "${DBT_OBJECTS[@]}" | sort -u))
# === Compare ===
echo "📊 Comparing DBT models vs Postgres state..."
RELEVANT_MODELS=()
STALE_MODELS=()
for pg_obj in "${POSTGRES_OBJECTS[@]}"; do
if printf "%s\n" "${DBT_OBJECTS[@]}" | grep -Fxq "$pg_obj"; then
RELEVANT_MODELS+=("$pg_obj")
else
STALE_MODELS+=("$pg_obj")
fi
done
# === Output ===
echo ""
echo "✅ Relevant models (in both DB and DBT):"
printf "%s\n" "${RELEVANT_MODELS[@]}" | sort
echo ""
echo "⚠️ Stale models (in DB but NOT in DBT):"
printf "%s\n" "${STALE_MODELS[@]}" | sort
# === Format stale models for Slack ===
if [ "$SEND_SLACK" = true ]; then
echo "✅ Sending slack message with results."
if [ ${#STALE_MODELS[@]} -eq 0 ]; then
SLACK_MSG=":white_check_mark::white_check_mark::white_check_mark: dbt models reviewed. No stale models found in the database! :white_check_mark::white_check_mark::white_check_mark:"
curl -X POST -H 'Content-type: application/json' \
--data "{\"text\":\"$SLACK_MSG\"}" \
"$SLACK_RECEIPT_WEBHOOK_URL"
else
SLACK_MSG=":rotating_light::rotating_light::rotating_light: Stale models detected in Postgres (not in dbt manifest): :rotating_light::rotating_light::rotating_light:\n"
for model in "${STALE_MODELS[@]}"; do
SLACK_MSG+="- \`$model\`\n"
done
curl -X POST -H 'Content-type: application/json' \
--data "{\"text\":\"$SLACK_MSG\"}" \
"$SLACK_ALERT_WEBHOOK_URL"
fi
fi