#!/bin/bash set -euo pipefail STARTING_DIR="/home/azureuser" cd "$STARTING_DIR" # === CONFIGURATION === DBT_PROJECT="dwh_dbt" DBT_TARGET="prd" PROFILE_YML="$STARTING_DIR/.dbt/profiles.yml" # === Flag defaults === SEND_SLACK=false # === Parse flags === while [[ $# -gt 0 ]]; do case "$1" in -s|--slack) SEND_SLACK=true shift ;; -*) echo "❌ Unknown option: $1" exit 1 ;; *) break ;; esac done # === Positional arguments === SCHEMAS="$1" MANIFEST_PATH="$2" shift 2 IFS=',' read -r -a SCHEMA_ARRAY <<< "$SCHEMAS" # === Tool check/install === install_tool_if_missing() { TOOL_CALL_NAME=$1 TOOL_APT_NAME=$2 if ! command -v "$TOOL_CALL_NAME" &>/dev/null; then echo "🔧 Installing missing tool: $TOOL_APT_NAME" sudo apt-get update -qq sudo apt-get install -y "$TOOL_APT_NAME" else echo "✅ $TOOL_APT_NAME is installed" fi } install_tool_if_missing jq jq install_tool_if_missing yq yq install_tool_if_missing psql postgresql-client # === Slack webhook setup === script_dir=$(dirname "$0") webhooks_file="slack_webhook_urls.txt" env_file="$script_dir/$webhooks_file" if [ -f "$env_file" ]; then export $(grep -v '^#' "$env_file" | xargs) else echo "Error: $webhooks_file file not found in the script directory." exit 1 fi # === Load DB credentials from profiles.yml === echo "🔐 Loading DB credentials from $PROFILE_YML..." DB_NAME=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.dbname" "$PROFILE_YML") DB_USER=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.user" "$PROFILE_YML") DB_HOST=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.host" "$PROFILE_YML") DB_PORT=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.port" "$PROFILE_YML") export PGPASSWORD=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.pass" "$PROFILE_YML") # === Get list of tables/views from Postgres === echo "🗃️ Reading current tables/views from PostgreSQL..." POSTGRES_OBJECTS=() for SCHEMA in "${SCHEMA_ARRAY[@]}"; do echo "🔎 Scanning schema: $SCHEMA" TABLES=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -Atc " SELECT LOWER(table_schema || '.' || table_name) FROM information_schema.tables WHERE table_schema = '$SCHEMA' AND table_type IN ('BASE TABLE', 'VIEW') AND table_name NOT LIKE 'pg_%' ORDER BY table_schema, table_name; ") while IFS= read -r tbl; do tbl_cleaned=$(echo "$tbl" | tr -d '[:space:]') [[ -n "$tbl_cleaned" ]] && POSTGRES_OBJECTS+=("$tbl_cleaned") done <<< "$TABLES" done POSTGRES_OBJECTS=($(printf "%s\n" "${POSTGRES_OBJECTS[@]}" | sort -u)) # === Parse manifest.json for dbt model output names === echo "📦 Extracting model output names from dbt manifest..." DBT_OBJECTS=() DBT_ENTRIES=$(jq -r ' .nodes | to_entries[] | select(.value.resource_type == "model" or .value.resource_type == "seed") | .value.schema + "." + .value.alias ' "$MANIFEST_PATH") while IFS= read -r entry; do entry_cleaned=$(echo "$entry" | tr -d '[:space:]' | tr '[:upper:]' '[:lower:]') [[ -n "$entry_cleaned" ]] && DBT_OBJECTS+=("$entry_cleaned") done <<< "$DBT_ENTRIES" DBT_OBJECTS=($(printf "%s\n" "${DBT_OBJECTS[@]}" | sort -u)) # === Compare === echo "📊 Comparing DBT models vs Postgres state..." RELEVANT_MODELS=($(comm -12 <(printf "%s\n" "${POSTGRES_OBJECTS[@]}" | sort) <(printf "%s\n" "${DBT_OBJECTS[@]}" | sort))) STALE_MODELS=($(comm -23 <(printf "%s\n" "${POSTGRES_OBJECTS[@]}" | sort) <(printf "%s\n" "${DBT_OBJECTS[@]}" | sort))) # === Output === echo "" echo "✅ Relevant models (in both DB and DBT):" printf "%s\n" "${RELEVANT_MODELS[@]}" | sort echo "" echo "⚠️ Stale models (in DB but NOT in DBT):" printf "%s\n" "${STALE_MODELS[@]}" | sort # === Format stale models for Slack === if [ "$SEND_SLACK" = true ]; then echo "✅ Sending slack message with results." if [ ${#STALE_MODELS[@]} -eq 0 ]; then SLACK_MSG=":white_check_mark::white_check_mark::white_check_mark: dbt models reviewed. No stale models found in the database! :white_check_mark::white_check_mark::white_check_mark:" curl -X POST -H 'Content-type: application/json' \ --data "{\"text\":\"$SLACK_MSG\"}" \ "$SLACK_RECEIPT_WEBHOOK_URL" else SLACK_MSG=":rotating_light::rotating_light::rotating_light: Stale models detected in Postgres (not in dbt manifest): :rotating_light::rotating_light::rotating_light:\n" for model in "${STALE_MODELS[@]}"; do SLACK_MSG+="- \`$model\`\n" done curl -X POST -H 'Content-type: application/json' \ --data "{\"text\":\"$SLACK_MSG\"}" \ "$SLACK_ALERT_WEBHOOK_URL" fi fi