script and docs
This commit is contained in:
parent
e0e97709c0
commit
ad67a79a24
2 changed files with 181 additions and 0 deletions
150
find_orphan_models_in_db.sh
Normal file
150
find_orphan_models_in_db.sh
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
STARTING_DIR="/home/azureuser"
|
||||
cd $STARTING_DIR
|
||||
|
||||
# === CONFIGURATION ===
|
||||
DBT_PROJECT="dwh_dbt"
|
||||
DBT_TARGET="prd"
|
||||
PROFILE_YML="$STARTING_DIR/.dbt/profiles.yml"
|
||||
|
||||
# === Flag defaults ===
|
||||
SEND_SLACK=false
|
||||
|
||||
# === Parse flags ===
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-s|--slack)
|
||||
SEND_SLACK=true
|
||||
shift
|
||||
;;
|
||||
-*)
|
||||
echo "❌ Unknown option: $1"
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# === Positional arguments ===
|
||||
SCHEMAS="$1"
|
||||
MANIFEST_PATH="$2"
|
||||
shift 2
|
||||
IFS=',' read -r -a SCHEMA_ARRAY <<< "$SCHEMAS"
|
||||
|
||||
# === Tool check/install ===
|
||||
install_tool_if_missing() {
|
||||
TOOL_CALL_NAME=$1
|
||||
TOOL_APT_NAME=$2
|
||||
if ! command -v "$TOOL_CALL_NAME" &>/dev/null; then
|
||||
echo "🔧 Installing missing tool: $TOOL_APT_NAME"
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -y "$TOOL_APT_NAME"
|
||||
else
|
||||
echo "✅ $TOOL_APT_NAME is installed"
|
||||
fi
|
||||
}
|
||||
|
||||
install_tool_if_missing jq jq
|
||||
install_tool_if_missing yq yq
|
||||
install_tool_if_missing psql postgresql-client
|
||||
|
||||
# === Slack webhook setup ===
|
||||
script_dir=$(dirname "$0")
|
||||
webhooks_file="slack_webhook_urls.txt"
|
||||
env_file="$script_dir/$webhooks_file"
|
||||
|
||||
if [ -f "$env_file" ]; then
|
||||
export $(grep -v '^#' "$env_file" | xargs)
|
||||
else
|
||||
echo "Error: $webhooks_file file not found in the script directory."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# === Load DB credentials from profiles.yml ===
|
||||
echo "🔐 Loading DB credentials from $PROFILE_YML..."
|
||||
DB_NAME=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.dbname" "$PROFILE_YML")
|
||||
DB_USER=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.user" "$PROFILE_YML")
|
||||
DB_HOST=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.host" "$PROFILE_YML")
|
||||
DB_PORT=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.port" "$PROFILE_YML")
|
||||
export PGPASSWORD=$(yq e ".${DBT_PROJECT}.outputs.${DBT_TARGET}.pass" "$PROFILE_YML")
|
||||
|
||||
# === Get list of tables/views from Postgres ===
|
||||
echo "🗃️ Reading current tables/views from PostgreSQL..."
|
||||
|
||||
POSTGRES_OBJECTS=()
|
||||
for SCHEMA in "${SCHEMA_ARRAY[@]}"; do
|
||||
echo "🔎 Scanning schema: $SCHEMA"
|
||||
TABLES=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -Atc "
|
||||
SELECT table_schema || '.' || table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = '$SCHEMA'
|
||||
AND table_type IN ('BASE TABLE', 'VIEW');
|
||||
")
|
||||
while IFS= read -r tbl; do
|
||||
[[ -n "$tbl" ]] && POSTGRES_OBJECTS+=("${tbl,,}")
|
||||
done <<< "$TABLES"
|
||||
done
|
||||
|
||||
POSTGRES_OBJECTS=($(printf "%s\n" "${POSTGRES_OBJECTS[@]}" | sort -u))
|
||||
|
||||
# === Parse manifest.json for dbt model output names ===
|
||||
echo "📦 Extracting model output names from dbt manifest..."
|
||||
|
||||
DBT_OBJECTS=()
|
||||
DBT_ENTRIES=$(jq -r '
|
||||
.nodes | to_entries[] |
|
||||
select(.value.resource_type == "model" or .value.resource_type == "seed") |
|
||||
.value.schema + "." + .value.alias
|
||||
' "$MANIFEST_PATH")
|
||||
|
||||
while IFS= read -r entry; do
|
||||
[[ -n "$entry" ]] && DBT_OBJECTS+=("${entry,,}")
|
||||
done <<< "$DBT_ENTRIES"
|
||||
|
||||
DBT_OBJECTS=($(printf "%s\n" "${DBT_OBJECTS[@]}" | sort -u))
|
||||
|
||||
# === Compare ===
|
||||
echo "📊 Comparing DBT models vs Postgres state..."
|
||||
|
||||
RELEVANT_MODELS=()
|
||||
STALE_MODELS=()
|
||||
|
||||
for pg_obj in "${POSTGRES_OBJECTS[@]}"; do
|
||||
if printf "%s\n" "${DBT_OBJECTS[@]}" | grep -Fxq "$pg_obj"; then
|
||||
RELEVANT_MODELS+=("$pg_obj")
|
||||
else
|
||||
STALE_MODELS+=("$pg_obj")
|
||||
fi
|
||||
done
|
||||
|
||||
# === Output ===
|
||||
echo ""
|
||||
echo "✅ Relevant models (in both DB and DBT):"
|
||||
printf "%s\n" "${RELEVANT_MODELS[@]}" | sort
|
||||
|
||||
echo ""
|
||||
echo "⚠️ Stale models (in DB but NOT in DBT):"
|
||||
printf "%s\n" "${STALE_MODELS[@]}" | sort
|
||||
|
||||
# === Format stale models for Slack ===
|
||||
if [ "$SEND_SLACK" = true ]; then
|
||||
echo "✅ Sending slack message with results."
|
||||
if [ ${#STALE_MODELS[@]} -eq 0 ]; then
|
||||
SLACK_MSG=":white_check_mark::white_check_mark::white_check_mark: dbt models reviewed. No stale models found in the database! :white_check_mark::white_check_mark::white_check_mark:"
|
||||
curl -X POST -H 'Content-type: application/json' \
|
||||
--data "{\"text\":\"$SLACK_MSG\"}" \
|
||||
"$SLACK_RECEIPT_WEBHOOK_URL"
|
||||
else
|
||||
SLACK_MSG=":rotating_light::rotating_light::rotating_light: Stale models detected in Postgres (not in dbt manifest): :rotating_light::rotating_light::rotating_light:\n"
|
||||
for model in "${STALE_MODELS[@]}"; do
|
||||
SLACK_MSG+="- \`$model\`\n"
|
||||
done
|
||||
curl -X POST -H 'Content-type: application/json' \
|
||||
--data "{\"text\":\"$SLACK_MSG\"}" \
|
||||
"$SLACK_ALERT_WEBHOOK_URL"
|
||||
fi
|
||||
fi
|
||||
Loading…
Add table
Add a link
Reference in a new issue