From 03797827a4fbf4b38b9841bb8d4d7ee8266cb9de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oriol=20Roqu=C3=A9=20Paniagua?= Date: Thu, 31 Oct 2024 15:45:28 +0000 Subject: [PATCH] Merged PR 3396: Changes claims and logic to consider user is in new dash # Description Considers the 2 Claims NewDashVersion and NewDashMoveDate as sources of truth, removing the previous (crazy) logic. If a user has the claim NewDashVersion, then it's in New Dash. The claim value will also provide the version in which the user appeared (MVP, V2, etc) If a user has the NewDashMoveDate, it means it has moved from Old Dash. If not, but still has NewDashVersion, it means the user was directly created in New Dash. The models now provide logic to handle these cases, and it's propagated downstream will ensuring reporting will still work. # Checklist - [X] The edited models and dependants run properly with production data. - [X] The edited models are sufficiently documented. - [NOT AT ALL] The edited models contain PK tests, and I've ran and passed them. **MANY ISSUES ON PRODUCTION CURRENTLY** - [X] I have checked for DRY opportunities with other models and docs. - [X] I've picked the right materialization for the affected models. # Other - [ ] Check if a full-refresh is required after this PR is merged. Changes claims and logic to consider user is in new dash. Downstream propagation included Related work items: #23457 --- macros/user_migration_configuration.sql | 49 ------- .../core/int_core__new_dash_user_overview.sql | 4 +- .../core/int_core__new_dash_users.sql | 56 ++++++++ .../intermediate/core/int_core__user_host.sql | 16 ++- .../core/int_core__user_migration.sql | 87 ------------- .../core/int_core__user_product_bundle.sql | 6 +- models/intermediate/core/schema.yml | 122 +++++++++++------- 7 files changed, 145 insertions(+), 195 deletions(-) delete mode 100644 macros/user_migration_configuration.sql create mode 100644 models/intermediate/core/int_core__new_dash_users.sql delete mode 100644 models/intermediate/core/int_core__user_migration.sql diff --git a/macros/user_migration_configuration.sql b/macros/user_migration_configuration.sql deleted file mode 100644 index cb46c49..0000000 --- a/macros/user_migration_configuration.sql +++ /dev/null @@ -1,49 +0,0 @@ -/* -Macro: get_new_dash_migration_phases_configuration - -Provides a general configuration for the different phases of the - New Dash migration. Each phase is identifiable via a phase_name, - that is the "expected display" for users. The assumption is that - each user migration is identified via claim_type. Lastly, we - apply here a hardcode of when the deployment was carried out. - -Important note: if a user migrates once a phase has started, we - will not be able to tell when that happened. However, it is likely - that other indicators will provide an estimate. For example: - - The migration A happens on 1st July 2024. - User A is migrated on 1st July 2024 (as expected) - User B is migrated on 10th July 2024 (not expected) - - It is likely that User B won't have Bookings from new dash - until it's migrated. So this migration date should be considered - as a hard, lower-limit of dates. - - If a user satisfies more than one valid claims, in order to ensure - that the good date is considered we use the claim_enforcement_priority - value to force that a certain claim type will have precedence over - another. The lower the value, the more priority. -*/ -{% macro get_new_dash_migration_phases_config() %} - {% set migration_phases = [ - { - "phase_name": "MVP", - "claim_type": "KYGMVP", - "default_deployment_date": "2024-07-30", - "claim_enforcement_priority": 2, - }, - { - "phase_name": "MVP", - "claim_type": "MVPMIGRATEDUSER", - "default_deployment_date": "2024-09-10", - "claim_enforcement_priority": 1, - }, - { - "phase_name": "V2", - "claim_type": "KYGMOVEDATE", - "default_deployment_date": "2024-10-08", - "claim_enforcement_priority": 0, - }, - ] %} - {{ return(migration_phases) }} -{% endmacro %} diff --git a/models/intermediate/core/int_core__new_dash_user_overview.sql b/models/intermediate/core/int_core__new_dash_user_overview.sql index b71716f..51cbe1e 100644 --- a/models/intermediate/core/int_core__new_dash_user_overview.sql +++ b/models/intermediate/core/int_core__new_dash_user_overview.sql @@ -16,8 +16,8 @@ with select upb.id_user_host, - uh.migration_phase as user_migration_phase, - uh.lower_limit_migration_date_utc as user_estimated_migration_date_utc, + uh.new_dash_version as user_migration_phase, + uh.user_in_new_dash_since_date_utc as user_estimated_migration_date_utc, uh.company_name, uh.first_name, uh.last_name, diff --git a/models/intermediate/core/int_core__new_dash_users.sql b/models/intermediate/core/int_core__new_dash_users.sql new file mode 100644 index 0000000..4e76c4c --- /dev/null +++ b/models/intermediate/core/int_core__new_dash_users.sql @@ -0,0 +1,56 @@ +{% set new_dash_mvp_launch_date = "2024-07-30" %} + +{{ config(materialized="table") }} +with + stg_core__claim as (select * from {{ ref("stg_core__claim") }}), + stg_core__user as (select * from {{ ref("stg_core__user") }}), + + new_dash_users as ( + select distinct id_user as id_user_host, upper(claim_value) as new_dash_version + from stg_core__claim + where upper(claim_type) in ('NEWDASHVERSION') + ), + new_dash_users_with_move_date as ( + select + ndu.id_user_host, + ndu.new_dash_version, + min( + case + when + claim_value + ~ '^(20\d{2})-(0[1-9]|1[0-2])-(0[1-9]|1\d|2\d|3[01])$' + then cast(date(claim_value) as timestamp) + else null + end + ) as new_dash_move_at_utc + from new_dash_users ndu + left join + stg_core__claim c + on c.id_user = ndu.id_user_host + and upper(c.claim_type) in ('NEWDASHMOVEDATE') + group by 1, 2 + ) +select + nduwmd.id_user_host, + nduwmd.new_dash_version, + date(nduwmd.new_dash_move_at_utc) as new_dash_move_date_utc, + nduwmd.new_dash_move_at_utc, + case + when nduwmd.id_user_host is not null and nduwmd.new_dash_move_at_utc is not null + then true + else false + end as has_user_moved_from_old_dash, + -- If user has not moved, use the join date. If the join date is before the first + -- MVP migration, use the default migration date + coalesce( + nduwmd.new_dash_move_at_utc, + greatest( + u.joined_at_utc, cast(date('{{new_dash_mvp_launch_date}}') as timestamp) + ) + ) as user_in_new_dash_since_timestamp_at_utc, + coalesce( + date(nduwmd.new_dash_move_at_utc), + greatest(u.joined_date_utc, date('{{new_dash_mvp_launch_date}}')) + ) as user_in_new_dash_since_date_utc +from new_dash_users_with_move_date nduwmd +inner join stg_core__user u on nduwmd.id_user_host = u.id_user diff --git a/models/intermediate/core/int_core__user_host.sql b/models/intermediate/core/int_core__user_host.sql index 7af308b..ed00ef5 100644 --- a/models/intermediate/core/int_core__user_host.sql +++ b/models/intermediate/core/int_core__user_host.sql @@ -6,7 +6,7 @@ with int_core__unified_user as (select * from {{ ref("int_core__unified_user") }}), int_core__user_role as (select * from {{ ref("int_core__user_role") }}), - int_core__user_migration as (select * from {{ ref("int_core__user_migration") }}), + int_core__new_dash_users as (select * from {{ ref("int_core__new_dash_users") }}), stg_core__claim as (select * from {{ ref("stg_core__claim") }}), int_core__deal as (select * from {{ ref("int_core__deal") }}), @@ -49,10 +49,16 @@ select uu.joined_date_utc, uu.created_date_utc, uu.updated_date_utc, - case when um.id_user_host is not null then true else false end as is_user_migrated, - um.migration_phase, - um.lower_limit_migration_date_utc + case + when ndu.id_user_host is not null then true else false + end as is_user_in_new_dash, + ndu.has_user_moved_from_old_dash, + ndu.new_dash_version, + ndu.new_dash_move_at_utc, + ndu.new_dash_move_date_utc, + ndu.user_in_new_dash_since_timestamp_at_utc, + ndu.user_in_new_dash_since_date_utc from int_core__unified_user uu inner join unique_host_user uhu on uu.id_user = uhu.id_user -left join int_core__user_migration um on uu.id_user = um.id_user_host +left join int_core__new_dash_users ndu on uu.id_user = ndu.id_user_host left join int_core__deal d on uu.id_deal = d.id_deal diff --git a/models/intermediate/core/int_core__user_migration.sql b/models/intermediate/core/int_core__user_migration.sql deleted file mode 100644 index b576d7a..0000000 --- a/models/intermediate/core/int_core__user_migration.sql +++ /dev/null @@ -1,87 +0,0 @@ -{% set migration_phases = get_new_dash_migration_phases_config() %} - -{{ config(materialized="table") }} -with - stg_core__claim as (select * from {{ ref("stg_core__claim") }}), - stg_core__user as (select * from {{ ref("stg_core__user") }}), - - user_migration_from_claim as ( - select - id_user as id_user_host, - case - {% for phase in migration_phases %} - when upper(claim_type) = '{{ phase.claim_type }}' - then '{{ phase.phase_name }}' - {% endfor %} - else null - end as migration_phase, - case - {% for phase in migration_phases %} - -- When the claim value text contains a Date, use the greatest - -- value of the claim value itself or the default deployment date. - -- This is to ensure that any user migration happens at least - -- after the default deployment date - when - upper(claim_type) = '{{ phase.claim_type }}' - and claim_value ~ '^\d{4}-\d{2}-\d{2}$' - then - greatest( - date(claim_value), - date('{{ phase.default_deployment_date }}') - ) - -- If the claim value text does not contain a Date, use the - -- default deployment date - when - upper(claim_type) = '{{ phase.claim_type }}' - and not claim_value ~ '^\d{4}-\d{2}-\d{2}$' - then date('{{ phase.default_deployment_date }}') - {% endfor %} - else null - end as lower_limit_migration_date_utc, - case - {% for phase in migration_phases %} - when upper(claim_type) = '{{ phase.claim_type }}' - then '{{ phase.claim_enforcement_priority }}' - {% endfor %} - else null - end as claim_enforcement_priority - from stg_core__claim - where - {% for phase in migration_phases %} - (upper(claim_type) = '{{ phase.claim_type }}') - {% if not loop.last %} or {% endif %} - {% endfor %} - - ) --- Get only one migration date per user migrated. If a user satisfies several claims, --- the date corresponding to the claim with more priority will be selected. The lower --- the claim_enforcement_priority, the more priority it will have. If still 2 claims --- have the same priority, select the earliest date. --- Lastly, the migrated date is overridden with the user joined date only if the user --- was actually created after the expected migration date. -select - c.id_user_host, - c.migration_phase, - greatest( - c.lower_limit_migration_date_utc, u.joined_date_utc - ) as lower_limit_migration_date_utc, - c.claim_enforcement_priority, - case - when u.joined_date_utc > c.lower_limit_migration_date_utc then true else false - end as is_overridden_with_joined_date -from - ( - select - id_user_host, - migration_phase, - lower_limit_migration_date_utc, - claim_enforcement_priority, - row_number() over ( - partition by id_user_host - order by - claim_enforcement_priority asc, lower_limit_migration_date_utc asc - ) as rank - from user_migration_from_claim - ) c -inner join stg_core__user u on c.id_user_host = u.id_user -where rank = 1 diff --git a/models/intermediate/core/int_core__user_product_bundle.sql b/models/intermediate/core/int_core__user_product_bundle.sql index 2ec0e69..6373ec5 100644 --- a/models/intermediate/core/int_core__user_product_bundle.sql +++ b/models/intermediate/core/int_core__user_product_bundle.sql @@ -27,12 +27,12 @@ select backend ones (named as original_starts_at and original_ends_at). */ greatest( - date(upb.starts_at_utc), uh.lower_limit_migration_date_utc + date(upb.starts_at_utc), uh.user_in_new_dash_since_date_utc ) as effective_start_date_utc, case when upb.has_no_end_date then null - else greatest(date(upb.ends_at_utc), uh.lower_limit_migration_date_utc) + else greatest(date(upb.ends_at_utc), uh.user_in_new_dash_since_date_utc) end as effective_end_date_utc, upb.has_no_end_date, upb.created_at_utc, @@ -51,5 +51,5 @@ In order to avoid potential upstream problems, it is enforced inner join int_core__user_host uh on upb.id_user = uh.id_user_host - and uh.is_user_migrated = true + and uh.is_user_in_new_dash = true left join stg_core__protection_plan pp on upb.id_protection_plan = pp.id_protection_plan diff --git a/models/intermediate/core/schema.yml b/models/intermediate/core/schema.yml index 98d35d6..babb9af 100644 --- a/models/intermediate/core/schema.yml +++ b/models/intermediate/core/schema.yml @@ -2378,30 +2378,53 @@ models: description: | Date of the last time the information of the Host was updated in our systems. - - name: is_user_migrated + - name: is_user_in_new_dash data_type: boolean description: | - Flag to determine if this user host has been migrated according - to the logic implemented in user_migration table. - - name: migration_phase + Flag to determine if this user host is in New Dash or not. + - name: has_user_moved_from_old_dash + data_type: boolean + description: | + Flag to determine if this user host is in New Dash and has + been moved from the old dash. + - name: new_dash_version data_type: string description: | - The name of the phase this user was first migrated. - - name: lower_limit_migration_date_utc + For users that are in New Dash, specifies the New Dash Version + in which these users were moved or joined. + - name: new_dash_move_date_utc data_type: date description: | - The date that the deployment of the migration happened. - It does not necessarily mean that this user was migrated in - this date. This user could have not been migrated before - this date. + For users that are in New Dash and have been moved from + Old Dash, specifies the date in which users switched. + - name: new_dash_move_at_utc + data_type: timestamp + description: | + For users that are in New Dash and have been moved from + Old Dash, specifies the timestamp in which users switched. + This is an estimate since we don't know for sure the exact + timestamp. Currently defaulting to 00:00:00h of given move + date. + - name: user_in_new_dash_since_date_utc + data_type: date + description: | + For users that are in New Dash, the effective date since + these users can be considered in New Dash. It the user + has moved from Old Dash, it will be the new_dash_move_date_utc. + If not, it will correspond to the joined_date_utc. + - name: user_in_new_dash_since_timestamp_at_utc + data_type: timestamp + description: | + For users that are in New Dash, the effective date since + these users can be considered in New Dash. It the user + has moved from Old Dash, it will be the new_dash_move_at_utc. + If not, it will correspond to the joined_at_utc. - - name: int_core__user_migration + - name: int_core__new_dash_users description: | - This table provides information of the host users that have been migrated. - At this stage, the main objective is to account for the user migration within - the scope of New Dashboard migration. - It uses the migration configuration settled in the macro: - - user_migration_configuration -> get_new_dash_migration_phases_config + This table provides information of the host users that are in New Dash. + Users can be in New Dash because 1) have moved (migrated) from Old Dash + or 2) have been directly created within New Dash. columns: - name: id_user_host @@ -2410,32 +2433,44 @@ models: tests: - not_null - unique - - name: migration_phase + - name: new_dash_version data_type: string description: | - The name of the phase this user was first migrated. + The name of the New Dash version this user appeared firstly. tests: - not_null - - name: lower_limit_migration_date_utc - data_type: date - description: | - The date that the deployment of the migration happened. - It does not necessarily mean that this user was migrated in - this date. This user could have not been migrated before - this date. - tests: - - not_null - - name: claim_enforcement_priority - data_type: integer - description: | - Informative field. It just states which claim enforcement priority - has been used for the computation. - - name: is_overridden_with_joined_date + - name: has_user_moved_from_old_dash data_type: boolean description: | - Informative field. Boolean to indicate if the expected migration date - based in the logic is finally overridden with the user joined date - in case the user was created after the expected migration date. + Flag to determine if this user host is in New Dash and has + been moved from the old dash. + - name: new_dash_move_date_utc + data_type: date + description: | + For users that are in New Dash and have been moved from + Old Dash, specifies the date in which users switched. + - name: new_dash_move_at_utc + data_type: timestamp + description: | + For users that are in New Dash and have been moved from + Old Dash, specifies the timestamp in which users switched. + This is an estimate since we don't know for sure the exact + timestamp. Currently defaulting to 00:00:00h of given move + date. + - name: user_in_new_dash_since_date_utc + data_type: date + description: | + For users that are in New Dash, the effective date since + these users can be considered in New Dash. It the user + has moved from Old Dash, it will be the new_dash_move_date_utc. + If not, it will correspond to the joined_date_utc. + - name: user_in_new_dash_since_timestamp_at_utc + data_type: timestamp + description: | + For users that are in New Dash, the effective date since + these users can be considered in New Dash. It the user + has moved from Old Dash, it will be the new_dash_move_at_utc. + If not, it will correspond to the joined_at_utc. - name: int_core__address_validations description: @@ -2525,13 +2560,6 @@ models: Thus, the relationship in this table only shows, from user point of view, what Product Bundles she/he can apply into a Listing. - tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: - - id_user_host - - id_product_bundle - - original_starts_at_utc - columns: - name: id_user_product_bundle data_type: bigint @@ -2552,9 +2580,7 @@ models: data_type: int description: | The identifier of the Product Bundle associated to the user. The same Product - Bundle can be applied into many users. - tests: - - not_null + Bundle can be applied into many users. Can be null if the bundle is custom. - name: id_protection_plan data_type: int @@ -3368,9 +3394,7 @@ models: - name: id_product_bundle data_type: bigint description: | - The identifier of the product bundle. - tests: - - not_null + The identifier of the product bundle. Can be null if it's a custom bundle. - name: id_product_service data_type: bigint