data-dwh-dbt-project/macros/compare_table_hashes.sql

51 lines
1.5 KiB
MySQL
Raw Normal View History

/*
Query all columns in two models, reduce the table to a content hash and compare
the hashes across both.
You must pass a col name that exists in both models to sort them in order to
make serialization stable. It is recommended to pass the PK.
*/
{% macro compare_table_hashes(model_1, model_2, order_by_col) %}
{% set table_1 = ref(model_1) %}
{% set table_2 = ref(model_2) %}
{# Build query 1 #}
{% set query_1 %}
SELECT md5(array_agg(md5((t1.*)::varchar))::varchar) AS hash_val
FROM (
SELECT *
FROM {{ table_1 }}
ORDER BY {{ order_by_col }}
) AS t1
{% endset %}
{# Build query 2 #}
{% set query_2 %}
SELECT md5(array_agg(md5((t1.*)::varchar))::varchar) AS hash_val
FROM (
SELECT *
FROM {{ table_2 }}
ORDER BY {{ order_by_col }}
) AS t1
{% endset %}
{# Execute queries #}
{% set result_1 = run_query(query_1) %}
{% set result_2 = run_query(query_2) %}
{% if result_1 and result_2 %}
{% set hash_1 = result_1.columns[0].values()[0] %}
{% set hash_2 = result_2.columns[0].values()[0] %}
{{ log("Hash for " ~ model_1 ~ ": " ~ hash_1, info=True) }}
{{ log("Hash for " ~ model_2 ~ ": " ~ hash_2, info=True) }}
{% if hash_1 == hash_2 %} {{ log("✅ Table hashes match!", info=True) }}
{% else %} {{ log("❌ Table hashes do NOT match!", info=True) }}
{% endif %}
{% else %}
{{ log("❗ Could not retrieve hashes for one or both models.", info=True) }}
{% endif %}
{% endmacro %}