diff --git a/macros/compare_table_hashes.sql b/macros/compare_table_hashes.sql new file mode 100644 index 0000000..8e19bf6 --- /dev/null +++ b/macros/compare_table_hashes.sql @@ -0,0 +1,50 @@ +/* +Query all columns in two models, reduce the table to a content hash and compare +the hashes across both. + +You must pass a col name that exists in both models to sort them in order to +make serialization stable. It is recommended to pass the PK. +*/ +{% macro compare_table_hashes(model_1, model_2, order_by_col) %} + + {% set table_1 = ref(model_1) %} + {% set table_2 = ref(model_2) %} + + {# Build query 1 #} + {% set query_1 %} + SELECT md5(array_agg(md5((t1.*)::varchar))::varchar) AS hash_val + FROM ( + SELECT * + FROM {{ table_1 }} + ORDER BY {{ order_by_col }} + ) AS t1 + {% endset %} + + {# Build query 2 #} + {% set query_2 %} + SELECT md5(array_agg(md5((t1.*)::varchar))::varchar) AS hash_val + FROM ( + SELECT * + FROM {{ table_2 }} + ORDER BY {{ order_by_col }} + ) AS t1 + {% endset %} + + {# Execute queries #} + {% set result_1 = run_query(query_1) %} + {% set result_2 = run_query(query_2) %} + + {% if result_1 and result_2 %} + {% set hash_1 = result_1.columns[0].values()[0] %} + {% set hash_2 = result_2.columns[0].values()[0] %} + + {{ log("Hash for " ~ model_1 ~ ": " ~ hash_1, info=True) }} + {{ log("Hash for " ~ model_2 ~ ": " ~ hash_2, info=True) }} + + {% if hash_1 == hash_2 %} {{ log("✅ Table hashes match!", info=True) }} + {% else %} {{ log("❌ Table hashes do NOT match!", info=True) }} + {% endif %} + {% else %} + {{ log("❗ Could not retrieve hashes for one or both models.", info=True) }} + {% endif %} +{% endmacro %}