data-dwh-dbt-project/macros/cosmos_db_utils.sql

{% macro cosmos_db_record_deduplication(source_table, primary_key) %}
    /*
This macro provides a deduplication logic for Cosmos DB entities.
If two or more records have a duplicated value in a field that acts as
the primary key, the most recent record will be returned. If the record
is not duplicated, it will also be returned.

Inputs:
    - source_table: table that acts as source. Should be a sync model.
    - primary_key: unique identifier on which the deduplication will be applied.
Output:
    - Returns the set of records from the source_table that are unique according
      to the primary_key.
*/
    select *
    from
        -- Some thoughts for the future here:
        -- The query below is awful performance wise - but data
        -- size is tiny today. Let's tackle the problem as it comes.
        (
            select
                *,
                row_number() over (
                    partition by
                        {{ adapter.quote("documents") }} ->> '{{ primary_key }}'
                    order by ({{ adapter.quote("documents") }} ->> '_ts')::integer desc
                ) as rank
            from {{ source_table }}
        )
    where rank = 1
{% endmacro %}