fix bugs in orphan model detection

This commit is contained in:
Pablo Martin 2025-07-08 17:05:45 +02:00
parent 717590513f
commit 7488400cbb

View file

@ -2,7 +2,7 @@
set -euo pipefail set -euo pipefail
STARTING_DIR="/home/azureuser" STARTING_DIR="/home/azureuser"
cd $STARTING_DIR cd "$STARTING_DIR"
# === CONFIGURATION === # === CONFIGURATION ===
DBT_PROJECT="dwh_dbt" DBT_PROJECT="dwh_dbt"
@ -79,13 +79,16 @@ POSTGRES_OBJECTS=()
for SCHEMA in "${SCHEMA_ARRAY[@]}"; do for SCHEMA in "${SCHEMA_ARRAY[@]}"; do
echo "🔎 Scanning schema: $SCHEMA" echo "🔎 Scanning schema: $SCHEMA"
TABLES=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -Atc " TABLES=$(psql -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -Atc "
SELECT table_schema || '.' || table_name SELECT LOWER(table_schema || '.' || table_name)
FROM information_schema.tables FROM information_schema.tables
WHERE table_schema = '$SCHEMA' WHERE table_schema = '$SCHEMA'
AND table_type IN ('BASE TABLE', 'VIEW'); AND table_type IN ('BASE TABLE', 'VIEW')
AND table_name NOT LIKE 'pg_%'
ORDER BY table_schema, table_name;
") ")
while IFS= read -r tbl; do while IFS= read -r tbl; do
[[ -n "$tbl" ]] && POSTGRES_OBJECTS+=("${tbl,,}") tbl_cleaned=$(echo "$tbl" | tr -d '[:space:]')
[[ -n "$tbl_cleaned" ]] && POSTGRES_OBJECTS+=("$tbl_cleaned")
done <<< "$TABLES" done <<< "$TABLES"
done done
@ -102,7 +105,8 @@ DBT_ENTRIES=$(jq -r '
' "$MANIFEST_PATH") ' "$MANIFEST_PATH")
while IFS= read -r entry; do while IFS= read -r entry; do
[[ -n "$entry" ]] && DBT_OBJECTS+=("${entry,,}") entry_cleaned=$(echo "$entry" | tr -d '[:space:]' | tr '[:upper:]' '[:lower:]')
[[ -n "$entry_cleaned" ]] && DBT_OBJECTS+=("$entry_cleaned")
done <<< "$DBT_ENTRIES" done <<< "$DBT_ENTRIES"
DBT_OBJECTS=($(printf "%s\n" "${DBT_OBJECTS[@]}" | sort -u)) DBT_OBJECTS=($(printf "%s\n" "${DBT_OBJECTS[@]}" | sort -u))
@ -110,16 +114,8 @@ DBT_OBJECTS=($(printf "%s\n" "${DBT_OBJECTS[@]}" | sort -u))
# === Compare === # === Compare ===
echo "📊 Comparing DBT models vs Postgres state..." echo "📊 Comparing DBT models vs Postgres state..."
RELEVANT_MODELS=() RELEVANT_MODELS=($(comm -12 <(printf "%s\n" "${POSTGRES_OBJECTS[@]}" | sort) <(printf "%s\n" "${DBT_OBJECTS[@]}" | sort)))
STALE_MODELS=() STALE_MODELS=($(comm -23 <(printf "%s\n" "${POSTGRES_OBJECTS[@]}" | sort) <(printf "%s\n" "${DBT_OBJECTS[@]}" | sort)))
for pg_obj in "${POSTGRES_OBJECTS[@]}"; do
if printf "%s\n" "${DBT_OBJECTS[@]}" | grep -Fxq "$pg_obj"; then
RELEVANT_MODELS+=("$pg_obj")
else
STALE_MODELS+=("$pg_obj")
fi
done
# === Output === # === Output ===
echo "" echo ""