146 lines
4.5 KiB
Bash
146 lines
4.5 KiB
Bash
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
STARTING_DIR="/home/azureuser"
|
|
cd "$STARTING_DIR"
|
|
|
|
# === CONFIGURATION ===
|
|
DBT_PROJECT="dwh_dbt"
|
|
DBT_TARGET="prd"
|
|
PROFILE_YML="$STARTING_DIR/.dbt/profiles.yml"
|
|
|
|
# === Flag defaults ===
|
|
SEND_SLACK=false
|
|
|
|
# === Parse flags ===
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
-s|--slack)
|
|
SEND_SLACK=true
|
|
shift
|
|
;;
|
|
-*)
|
|
echo "❌ Unknown option: $1"
|
|
exit 1
|
|
;;
|
|
*)
|
|
break
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# === Positional arguments ===
|
|
SCHEMAS="$1"
|
|
MANIFEST_PATH="$2"
|
|
shift 2
|
|
IFS=',' read -r -a SCHEMA_ARRAY <<< "$SCHEMAS"
|
|
|
|
# === Tool check/install ===
|
|
install_tool_if_missing() {
|
|
TOOL_CALL_NAME=$1
|
|
TOOL_APT_NAME=$2
|
|
if ! command -v "$TOOL_CALL_NAME" &>/dev/null; then
|
|
echo "🔧 Installing missing tool: $TOOL_APT_NAME"
|
|
sudo apt-get update -qq
|
|
sudo apt-get install -y "$TOOL_APT_NAME"
|
|
else
|
|
echo "✅ $TOOL_APT_NAME is installed"
|
|
fi
|
|
}
|
|
|
|
install_tool_if_missing jq jq
|
|
install_tool_if_missing yq yq
|
|
install_tool_if_missing psql postgresql-client
|
|
|
|
# === Slack webhook setup ===
|
|
script_dir=$(dirname "$0")
|
|
webhooks_file="slack_webhook_urls.txt"
|
|
env_file="$script_dir/$webhooks_file"
|
|
|
|
if [ -f "$env_file" ]; then
|
|
export $(grep -v '^#' "$env_file" | xargs)
|
|
else
|
|
echo "Error: $webhooks_file file not found in the script directory."
|
|
exit 1
|
|
fi
|
|
|
|
# === Load DB credentials from profiles.yml ===
|
|
echo "🔐 Loading DB credentials from $PROFILE_YML..."
|
|
DB_NAME=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.dbname" "$PROFILE_YML")
|
|
DB_USER=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.user" "$PROFILE_YML")
|
|
DB_HOST=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.host" "$PROFILE_YML")
|
|
DB_PORT=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.port" "$PROFILE_YML")
|
|
export PGPASSWORD=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.pass" "$PROFILE_YML")
|
|
|
|
# === Get list of tables/views from Postgres ===
|
|
echo "🗃️ Reading current tables/views from PostgreSQL..."
|
|
|
|
POSTGRES_OBJECTS=()
|
|
for SCHEMA in "${SCHEMA_ARRAY[@]}"; do
|
|
echo "🔎 Scanning schema: $SCHEMA"
|
|
TABLES=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -Atc "
|
|
SELECT LOWER(table_schema || '.' || table_name)
|
|
FROM information_schema.tables
|
|
WHERE table_schema = '$SCHEMA'
|
|
AND table_type IN ('BASE TABLE', 'VIEW')
|
|
AND table_name NOT LIKE 'pg_%'
|
|
ORDER BY table_schema, table_name;
|
|
")
|
|
while IFS= read -r tbl; do
|
|
tbl_cleaned=$(echo "$tbl" | tr -d '[:space:]')
|
|
[[ -n "$tbl_cleaned" ]] && POSTGRES_OBJECTS+=("$tbl_cleaned")
|
|
done <<< "$TABLES"
|
|
done
|
|
|
|
POSTGRES_OBJECTS=($(printf "%s\n" "${POSTGRES_OBJECTS[@]}" | sort -u))
|
|
|
|
# === Parse manifest.json for dbt model output names ===
|
|
echo "📦 Extracting model output names from dbt manifest..."
|
|
|
|
DBT_OBJECTS=()
|
|
DBT_ENTRIES=$(jq -r '
|
|
.nodes | to_entries[] |
|
|
select(.value.resource_type == "model" or .value.resource_type == "seed") |
|
|
.value.schema + "." + .value.alias
|
|
' "$MANIFEST_PATH")
|
|
|
|
while IFS= read -r entry; do
|
|
entry_cleaned=$(echo "$entry" | tr -d '[:space:]' | tr '[:upper:]' '[:lower:]')
|
|
[[ -n "$entry_cleaned" ]] && DBT_OBJECTS+=("$entry_cleaned")
|
|
done <<< "$DBT_ENTRIES"
|
|
|
|
DBT_OBJECTS=($(printf "%s\n" "${DBT_OBJECTS[@]}" | sort -u))
|
|
|
|
# === Compare ===
|
|
echo "📊 Comparing DBT models vs Postgres state..."
|
|
|
|
RELEVANT_MODELS=($(comm -12 <(printf "%s\n" "${POSTGRES_OBJECTS[@]}" | sort) <(printf "%s\n" "${DBT_OBJECTS[@]}" | sort)))
|
|
STALE_MODELS=($(comm -23 <(printf "%s\n" "${POSTGRES_OBJECTS[@]}" | sort) <(printf "%s\n" "${DBT_OBJECTS[@]}" | sort)))
|
|
|
|
# === Output ===
|
|
echo ""
|
|
echo "✅ Relevant models (in both DB and DBT):"
|
|
printf "%s\n" "${RELEVANT_MODELS[@]}" | sort
|
|
|
|
echo ""
|
|
echo "⚠️ Stale models (in DB but NOT in DBT):"
|
|
printf "%s\n" "${STALE_MODELS[@]}" | sort
|
|
|
|
# === Format stale models for Slack ===
|
|
if [ "$SEND_SLACK" = true ]; then
|
|
echo "✅ Sending slack message with results."
|
|
if [ ${#STALE_MODELS[@]} -eq 0 ]; then
|
|
SLACK_MSG=":white_check_mark::white_check_mark::white_check_mark: dbt models reviewed. No stale models found in the database! :white_check_mark::white_check_mark::white_check_mark:"
|
|
curl -X POST -H 'Content-type: application/json' \
|
|
--data "{\"text\":\"$SLACK_MSG\"}" \
|
|
"$SLACK_RECEIPT_WEBHOOK_URL"
|
|
else
|
|
SLACK_MSG=":rotating_light::rotating_light::rotating_light: Stale models detected in Postgres (not in dbt manifest): :rotating_light::rotating_light::rotating_light:\n"
|
|
for model in "${STALE_MODELS[@]}"; do
|
|
SLACK_MSG+="- \`$model\`\n"
|
|
done
|
|
curl -X POST -H 'Content-type: application/json' \
|
|
--data "{\"text\":\"$SLACK_MSG\"}" \
|
|
"$SLACK_ALERT_WEBHOOK_URL"
|
|
fi
|
|
fi
|