31 lines
1.2 KiB
SQL
31 lines
1.2 KiB
SQL
{% macro cosmos_db_record_deduplication(source_table, primary_key) %}
|
|
/*
|
|
This macro provides a deduplication logic for Cosmos DB entities.
|
|
If two or more records have a duplicated value in a field that acts as
|
|
the primary key, the most recent record will be returned. If the record
|
|
is not duplicated, it will also be returned.
|
|
|
|
Inputs:
|
|
- source_table: table that acts as source. Should be a sync model.
|
|
- primary_key: unique identifier on which the deduplication will be applied.
|
|
Output:
|
|
- Returns the set of records from the source_table that are unique according
|
|
to the primary_key.
|
|
*/
|
|
select *
|
|
from
|
|
-- Some thoughts for the future here:
|
|
-- The query below is awful performance wise - but data
|
|
-- size is tiny today. Let's tackle the problem as it comes.
|
|
(
|
|
select
|
|
*,
|
|
row_number() over (
|
|
partition by
|
|
{{ adapter.quote("documents") }} ->> '{{ primary_key }}'
|
|
order by ({{ adapter.quote("documents") }} ->> '_ts')::integer desc
|
|
) as rank
|
|
from {{ source_table }}
|
|
)
|
|
where rank = 1
|
|
{% endmacro %}
|