From 5b5c878c2ad9edabc40bf9ea4dc615577ef1f17b Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Thu, 7 Mar 2024 16:02:07 +0100 Subject: [PATCH 1/5] some work --- models/staging/core/_core_sources.yml | 109 +++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 3 deletions(-) diff --git a/models/staging/core/_core_sources.yml b/models/staging/core/_core_sources.yml index 97b3bac..3e5d97c 100644 --- a/models/staging/core/_core_sources.yml +++ b/models/staging/core/_core_sources.yml @@ -4,7 +4,110 @@ sources: - name: core schema: sync_core tables: - - name: user - identifier: User + - name: User + description: A User. + columns: + - name: Id + data_type: character varying + quote: True + description: "UUID for the user." + tests: + - unique + - not_null + # - dbt_expectations.expect_column_values_to_match_regex: + # regex: "^[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-4[0-9A-Fa-f]{3}-[89ABab][0-9A-Fa-f]{3}-[0-9A-Fa-f]{12}$" + - name: code + data_type: bigint + description: "" + - name: email + data_type: character varying + description: "" + - name: title + data_type: character varying + description: "" + - name: dealid + data_type: character varying + description: "" + - name: deleted + data_type: boolean + description: "" + - name: joindate + data_type: timestamp without time zone + description: "" + - name: lastname + data_type: character varying + description: "" + - name: username + data_type: character varying + description: "" + - name: firstname + data_type: character varying + description: "" + - name: codeprefix + data_type: character varying + description: "" + - name: billingtown + data_type: character varying + description: "" + - name: companyname + data_type: character varying + description: "" + - name: dateofbirth + data_type: timestamp without time zone + description: "" + - name: phonenumber + data_type: character varying + description: "" + - name: passwordhash + data_type: character varying + description: "" + - name: accounttypeid + data_type: bigint + description: "" + - name: createduserid + data_type: character varying + description: "" + - name: securitystamp + data_type: character varying + description: "" + - name: emailconfirmed + data_type: boolean + description: "" + - name: lockoutenabled + data_type: boolean + description: "" + - name: billingpostcode + data_type: character varying + description: "" + - name: billingcountryid + data_type: bigint + description: "" + - name: twofactorenabled + data_type: boolean + description: "" + - name: accessfailedcount + data_type: bigint + description: "" + - name: lockoutenddateutc + data_type: timestamp without time zone + description: "" + - name: billingaddressline1 + data_type: character varying + description: "" + - name: billingaddressline2 + data_type: character varying + description: "" + - name: phonenumberconfirmed + data_type: boolean + description: "" + - name: _airbyte_raw_id + data_type: character varying + description: "" + - name: _airbyte_extracted_at + data_type: timestamp with time zone + description: "" + - name: _airbyte_meta + data_type: jsonb + description: "" - name: superhog_user - identifier: SuperhogUser \ No newline at end of file + identifier: SuperhogUser From 3620401037d23ec3d59556a62bff29eb5fad2784 Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Thu, 7 Mar 2024 16:29:50 +0100 Subject: [PATCH 2/5] work --- models/staging/core/_core_sources.yml | 37 ++++++++++++++++++-------- models/staging/core/stg_core__user.sql | 2 +- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/models/staging/core/_core_sources.yml b/models/staging/core/_core_sources.yml index 3e5d97c..e4315ce 100644 --- a/models/staging/core/_core_sources.yml +++ b/models/staging/core/_core_sources.yml @@ -14,23 +14,38 @@ sources: tests: - unique - not_null - # - dbt_expectations.expect_column_values_to_match_regex: - # regex: "^[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-4[0-9A-Fa-f]{3}-[89ABab][0-9A-Fa-f]{3}-[0-9A-Fa-f]{12}$" - - name: code + - dbt_expectations.expect_column_values_to_match_regex: + regex: "^[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-4[0-9A-Fa-f]{3}-[89ABab][0-9A-Fa-f]{3}-[0-9A-Fa-f]{12}$" + - name: Code data_type: bigint - description: "" - - name: email + quote: True + description: "WIP. Is this an increment unique ID?" + tests: + - unique + - not_null + - name: Email data_type: character varying - description: "" - - name: title + quote: True + description: "Email for this user." + tests: + - unique + - not_null + - name: Title data_type: character varying + quote: True description: "" - - name: dealid + - name: DealId data_type: character varying - description: "" - - name: deleted + quote: True + description: "The Hubspot Deal Id that this user account belongs to. Multiple users can all be part of one Deal." + # This test can't be enforced yet due to some bad test data in the production database. Once that's deal with, this should be activated. + #tests: + # - dbt_expectations.expect_column_values_to_match_regex: + # regex: "^[0-9]{10,11}$" + - name: Deleted data_type: boolean - description: "" + quote: True + description: "WIP. I'm guessing this signals that the user wanted to be deleted?" - name: joindate data_type: timestamp without time zone description: "" diff --git a/models/staging/core/stg_core__user.sql b/models/staging/core/stg_core__user.sql index 8c50ef7..ac4ec0e 100644 --- a/models/staging/core/stg_core__user.sql +++ b/models/staging/core/stg_core__user.sql @@ -1,5 +1,5 @@ with - raw_user as (select * from {{ source("core", "user") }}), + raw_user as (select * from {{ source("core", "User") }}), stg_core__user as ( select {{ adapter.quote("Id") }} as id_user, From c059599ad6b3b11c7a6e7a9640ee2f52ead96fc1 Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Thu, 7 Mar 2024 16:53:42 +0100 Subject: [PATCH 3/5] more work --- models/staging/core/_core_sources.yml | 61 +++++++++++++++++++-------- 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/models/staging/core/_core_sources.yml b/models/staging/core/_core_sources.yml index e4315ce..8f39616 100644 --- a/models/staging/core/_core_sources.yml +++ b/models/staging/core/_core_sources.yml @@ -5,7 +5,7 @@ sources: schema: sync_core tables: - name: User - description: A User. + description: A User. columns: - name: Id data_type: character varying @@ -40,48 +40,73 @@ sources: description: "The Hubspot Deal Id that this user account belongs to. Multiple users can all be part of one Deal." # This test can't be enforced yet due to some bad test data in the production database. Once that's deal with, this should be activated. #tests: - # - dbt_expectations.expect_column_values_to_match_regex: - # regex: "^[0-9]{10,11}$" + # - dbt_expectations.expect_column_values_to_match_regex: + # regex: "^[0-9]{10,11}$" - name: Deleted data_type: boolean quote: True description: "WIP. I'm guessing this signals that the user wanted to be deleted?" - - name: joindate + tests: + - not_null + - name: JoinDate data_type: timestamp without time zone - description: "" - - name: lastname + quote: True + description: "Timestamp on which the user was created." + tests: + - not_null + - name: LastName data_type: character varying + quote: True description: "" - - name: username + - name: UserName data_type: character varying + quote: True description: "" - - name: firstname + tests: + - not_null + - name: FirstName data_type: character varying + quote: True description: "" - - name: codeprefix + - name: CodePrefix data_type: character varying - description: "" - - name: billingtown + quote: True + description: "WIP. No clue on what is this." + - name: BillingTown data_type: character varying + quote: True description: "" - - name: companyname + - name: CompanyName data_type: character varying + quote: True description: "" - - name: dateofbirth + - name: DateOfBirth data_type: timestamp without time zone + quote: True description: "" - - name: phonenumber + - name: PhoneNumber data_type: character varying + quote: True description: "" - - name: passwordhash + - name: PasswordHash data_type: character varying + quote: True description: "" - - name: accounttypeid + - name: AccountTypeId data_type: bigint + quote: True description: "" - - name: createduserid + - name: CreatedUserId data_type: character varying - description: "" + quote: True + description: | + A user can be created by another user. If this user has been created by another user, this is the UUID of the creator. + + Else, this is null. + tests: + - dbt_expectations.expect_column_values_to_match_regex: + regex: "^[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-4[0-9A-Fa-f]{3}-[89ABab][0-9A-Fa-f]{3}-[0-9A-Fa-f]{12}$" + row_condition: '"CreatedUserId" is not null' - name: securitystamp data_type: character varying description: "" From cd42340bc2027e69029d6c01f169ef51d282311d Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Thu, 7 Mar 2024 17:06:01 +0100 Subject: [PATCH 4/5] finished User --- models/staging/core/_core_sources.yml | 56 ++++++++++++++++++--------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/models/staging/core/_core_sources.yml b/models/staging/core/_core_sources.yml index 8f39616..4ae5688 100644 --- a/models/staging/core/_core_sources.yml +++ b/models/staging/core/_core_sources.yml @@ -107,47 +107,65 @@ sources: - dbt_expectations.expect_column_values_to_match_regex: regex: "^[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-4[0-9A-Fa-f]{3}-[89ABab][0-9A-Fa-f]{3}-[0-9A-Fa-f]{12}$" row_condition: '"CreatedUserId" is not null' - - name: securitystamp + - name: SecurityStamp data_type: character varying - description: "" - - name: emailconfirmed + quote: True + description: "WIP. I have no clue what this is." + - name: EmailConfirmed data_type: boolean - description: "" - - name: lockoutenabled + quote: True + description: "WIP. I have no clue what this is." + - name: LockoutEnabled data_type: boolean - description: "" - - name: billingpostcode + quote: True + description: "WIP. I have no clue what this is." + - name: BillingPostCode data_type: character varying + quote: True description: "" - - name: billingcountryid + - name: BillingCountryId data_type: bigint + quote: True description: "" - - name: twofactorenabled + - name: TwoFactorEnabled data_type: boolean + quote: True description: "" - - name: accessfailedcount + - name: AccessFailedCount data_type: bigint + quote: True description: "" - - name: lockoutenddateutc + - name: LockoutEndDateUtc data_type: timestamp without time zone + quote: True + description: "WIP. I have no clue what this is" + - name: BillinAddressLine1 + data_type: character varing + quote: True description: "" - - name: billingaddressline1 + - name: BillingAddressLine2 data_type: character varying + quote: True description: "" - - name: billingaddressline2 - data_type: character varying - description: "" - - name: phonenumberconfirmed + - name: PhoneNumberConfirmed data_type: boolean + quote: True description: "" + tests: + - not_null - name: _airbyte_raw_id data_type: character varying - description: "" + description: "{{ doc('_airbyte_raw_id_desc') }}" - name: _airbyte_extracted_at data_type: timestamp with time zone - description: "" + description: "{{ doc('_airbyte_extracted_at_desc') }}" + tests: + - not_null + - dbt_expectations.expect_row_values_to_have_recent_data: + datepart: day + interval: 2 - name: _airbyte_meta data_type: jsonb - description: "" + description: "{{ doc('_airbyte_meta_desc') }}" - name: superhog_user identifier: SuperhogUser From e7edcff1753e8e6d0501002ae3564483a9146e5b Mon Sep 17 00:00:00 2001 From: Pablo Martin Date: Thu, 7 Mar 2024 17:22:16 +0100 Subject: [PATCH 5/5] a few final changes --- models/staging/core/_core_sources.yml | 2 +- models/staging/core/schema.yml | 99 ++++++++++++++++++++++++-- models/staging/core/stg_core__user.sql | 2 +- 3 files changed, 95 insertions(+), 8 deletions(-) diff --git a/models/staging/core/_core_sources.yml b/models/staging/core/_core_sources.yml index 4ae5688..76b709c 100644 --- a/models/staging/core/_core_sources.yml +++ b/models/staging/core/_core_sources.yml @@ -139,7 +139,7 @@ sources: data_type: timestamp without time zone quote: True description: "WIP. I have no clue what this is" - - name: BillinAddressLine1 + - name: BillingAddressLine1 data_type: character varing quote: True description: "" diff --git a/models/staging/core/schema.yml b/models/staging/core/schema.yml index 67110ec..5df73b6 100644 --- a/models/staging/core/schema.yml +++ b/models/staging/core/schema.yml @@ -1,15 +1,102 @@ - version: 2 models: - - name: stg_core_user + - name: stg_core__user description: "The staging version of Core's User table. Do not confuse with SuperhogUser." columns: - name: id_user - description: "The primary key for this table" - tests: - - unique - - not_null + data_type: character varying + description: "UUID for the user." + - name: user_code + data_type: bigint + description: "WIP. Is this an increment unique ID?" + - name: email + data_type: character varying + description: "Email for this user." + - name: title + data_type: character varying + description: "" + - name: id_deal + data_type: character varying + description: "The Hubspot Deal Id that this user account belongs to. Multiple users can all be part of one Deal." + - name: is_deleted + data_type: boolean + description: "WIP. I'm guessing this signals that the user wanted to be deleted?" + - name: join_date_utc + data_type: timestamp without time zone + description: "Timestamp on which the user was created." + - name: last_name + data_type: character varying + description: "" + - name: user_name + data_type: character varying + description: "" + - name: first_name + data_type: character varying + description: "" + - name: code_prefix + data_type: character varying + description: "WIP. No clue on what is this." + - name: billing_town + data_type: character varying + description: "" + - name: company_name + data_type: character varying + description: "" + - name: date_of_birth + data_type: date + description: "" + - name: phone_number + data_type: character varying + description: "" + - name: password_hash + data_type: character varying + description: "" + - name: id_account_type + data_type: bigint + description: "" + - name: id_created_user + data_type: character varying + description: | + A user can be created by another user. If this user has been created by another user, this is the UUID of the creator. + + Else, this is null. + - name: security_stamp + data_type: character varying + description: "WIP. I have no clue what this is." + - name: is_email_confirmed + data_type: boolean + description: "WIP. I have no clue what this is." + - name: is_lockout_enabled + data_type: boolean + description: "WIP. I have no clue what this is." + - name: billing_postcode + data_type: character varying + description: "" + - name: id_billing_country + data_type: bigint + description: "" + - name: is_twofactor_enabled + data_type: boolean + description: "" + - name: access_failed_count + data_type: bigint + description: "" + - name: lockout_end_date_utc + data_type: timestamp without time zone + description: "WIP. I have no clue what this is" + - name: billing_address_line_1 + data_type: character varing + description: "" + - name: billing_address_line_2 + data_type: character varying + description: "" + - name: is_phone_number_confirmed + data_type: boolean + description: "" + - name: dwh_extracted_date_utc + data_type: date + description: "{{ doc('dwh_extracted_date_utc_desc')}}" - name: stg_core_superhog_user description: "The staging version of Core's SuperhogUser table. Do not confuse with User." diff --git a/models/staging/core/stg_core__user.sql b/models/staging/core/stg_core__user.sql index ac4ec0e..2d246c6 100644 --- a/models/staging/core/stg_core__user.sql +++ b/models/staging/core/stg_core__user.sql @@ -15,7 +15,7 @@ with {{ adapter.quote("CodePrefix") }} as code_prefix, {{ adapter.quote("BillingTown") }} as billing_town, {{ adapter.quote("CompanyName") }} as company_name, - {{ adapter.quote("DateOfBirth") }} as date_of_birth, + cast({{ adapter.quote("DateOfBirth") }} as date) as date_of_birth, {{ adapter.quote("PhoneNumber") }} as phone_number, {{ adapter.quote("PasswordHash") }} as password_hash, {{ adapter.quote("AccountTypeId") }} as id_account_type,