From ee6b61388727f8f13c66eec1fc9d0c498c90cd55 Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Fri, 27 Sep 2024 17:30:25 +0200 Subject: [PATCH 1/4] Stg Hubspot form_submissions --- models/staging/hubspot/_hubspot_sources.yml | 2 + models/staging/hubspot/schema.yml | 46 +++++++++++++++++++ .../hubspot/stg_hubspot__form_submissions.sql | 29 ++++++++++++ 3 files changed, 77 insertions(+) create mode 100644 models/staging/hubspot/stg_hubspot__form_submissions.sql diff --git a/models/staging/hubspot/_hubspot_sources.yml b/models/staging/hubspot/_hubspot_sources.yml index b0b6261..9450245 100644 --- a/models/staging/hubspot/_hubspot_sources.yml +++ b/models/staging/hubspot/_hubspot_sources.yml @@ -8,3 +8,5 @@ sources: identifier: contacts - name: deals identifier: deals + - name: form_submissions + identifier: form_submissions diff --git a/models/staging/hubspot/schema.yml b/models/staging/hubspot/schema.yml index 4cf8542..cb0f7dd 100644 --- a/models/staging/hubspot/schema.yml +++ b/models/staging/hubspot/schema.yml @@ -115,3 +115,49 @@ models: description: "Timestamp of when data was extracted to DWH." tests: - not_null + - name: stg_hubspot__form_submissions + description: "Table with forms values that have been submitted in the forms" + columns: + - name: id_form + data_type: character varying + description: "Unique id for each form submission." + + - name: values + data_type: jsonb + description: "Json with value information for each form" + tests: + - not_null + + - name: page_url + data_type: character varying + description: "" + + - name: submitted_at_utc + data_type: timestamp with time zone + description: "Timestamp of when this record was created." + tests: + - not_null + + - name: submitted_date_utc + data_type: timestamp without time zone + description: "Date of when this record was created." + tests: + - not_null + + - name: updated_at_utc + data_type: timestamp with time zone + description: "Timestamp of when this record was last updated." + tests: + - not_null + + - name: updated_date_utc + data_type: timestamp without time zone + description: "Date of when this record was last updated." + tests: + - not_null + + - name: dwh_extracted_at_utc + data_type: timestamp with time zone + description: "Timestamp of when data was extracted to DWH." + tests: + - not_null diff --git a/models/staging/hubspot/stg_hubspot__form_submissions.sql b/models/staging/hubspot/stg_hubspot__form_submissions.sql new file mode 100644 index 0000000..2293dc4 --- /dev/null +++ b/models/staging/hubspot/stg_hubspot__form_submissions.sql @@ -0,0 +1,29 @@ +with + raw_form_submissions as (select * from {{ source("hubspot", "form_submissions") }}), + stg_core__form_submissions as ( + select + {{ adapter.quote("formId") }} as id_form, {{ adapter.quote("values") }} as + values + , + {{ adapter.quote("pageUrl") }} as page_url, + to_timestamp( + {{ adapter.quote("submittedAt") }}::double precision / 1000 + ) at time zone 'UTC' as submitted_at_utc, + cast( + to_timestamp( + {{ adapter.quote("submittedAt") }}::double precision / 1000 + ) at time zone 'UTC' as date + ) as submitted_date_utc, + to_timestamp( + {{ adapter.quote("updatedAt") }}::double precision / 1000 + ) at time zone 'UTC' as updated_at_utc, + cast( + to_timestamp( + {{ adapter.quote("updatedAt") }}::double precision / 1000 + ) at time zone 'UTC' as date + ) as updated_date_utc, + {{ adapter.quote("_airbyte_extracted_at") }} as dwh_extracted_at_utc + from raw_form_submissions + ) +select * +from stg_core__form_submissions From 0a880a138b1dd0da243beb871abb077b8d58d2ae Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Wed, 2 Oct 2024 14:50:34 +0200 Subject: [PATCH 2/4] created macro and fixed naming --- macros/timestamp_to_utc.sql | 22 +++++++++++++++ .../hubspot/stg_hubspot__form_submissions.sql | 28 ++++--------------- 2 files changed, 28 insertions(+), 22 deletions(-) create mode 100644 macros/timestamp_to_utc.sql diff --git a/macros/timestamp_to_utc.sql b/macros/timestamp_to_utc.sql new file mode 100644 index 0000000..20cac93 --- /dev/null +++ b/macros/timestamp_to_utc.sql @@ -0,0 +1,22 @@ +/* +This macro converts a timestamp column from Unix epoch time (in milliseconds) +to a UTC timestamp and a corresponding UTC date. + +It generates two output fields: +1. The timestamp in UTC, with the alias formatted as _at_utc. +2. The date in UTC, with the alias formatted as _date_utc. + +This macro is intended to be used within a SELECT statement +and ensures that the output is properly formatted for further analysis. +*/ +{% macro timestamp_to_utc(column_name) %} + to_timestamp( + {{ adapter.quote(column_name) }}::double precision / 1000 + ) at time zone 'UTC' as {{ column_name | replace("At", "") }}_at_utc, + + cast( + to_timestamp( + {{ adapter.quote(column_name) }}::double precision / 1000 + ) at time zone 'UTC' as date + ) as {{ column_name | replace("At", "") }}_date_utc +{% endmacro %} diff --git a/models/staging/hubspot/stg_hubspot__form_submissions.sql b/models/staging/hubspot/stg_hubspot__form_submissions.sql index 2293dc4..705a952 100644 --- a/models/staging/hubspot/stg_hubspot__form_submissions.sql +++ b/models/staging/hubspot/stg_hubspot__form_submissions.sql @@ -1,29 +1,13 @@ with raw_form_submissions as (select * from {{ source("hubspot", "form_submissions") }}), - stg_core__form_submissions as ( + stg_hubspot__form_submissions as ( select - {{ adapter.quote("formId") }} as id_form, {{ adapter.quote("values") }} as - values - , - {{ adapter.quote("pageUrl") }} as page_url, - to_timestamp( - {{ adapter.quote("submittedAt") }}::double precision / 1000 - ) at time zone 'UTC' as submitted_at_utc, - cast( - to_timestamp( - {{ adapter.quote("submittedAt") }}::double precision / 1000 - ) at time zone 'UTC' as date - ) as submitted_date_utc, - to_timestamp( - {{ adapter.quote("updatedAt") }}::double precision / 1000 - ) at time zone 'UTC' as updated_at_utc, - cast( - to_timestamp( - {{ adapter.quote("updatedAt") }}::double precision / 1000 - ) at time zone 'UTC' as date - ) as updated_date_utc, + {{ adapter.quote("formId") }} as id_form, + {{ adapter.quote("values") }} as "values", + {{ timestamp_to_utc("submittedAt") }}, + {{ timestamp_to_utc("updatedAt") }}, {{ adapter.quote("_airbyte_extracted_at") }} as dwh_extracted_at_utc from raw_form_submissions ) select * -from stg_core__form_submissions +from stg_hubspot__form_submissions From e9f16342e7986de834e4aa024f41aac20bf312aa Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Thu, 3 Oct 2024 12:19:27 +0200 Subject: [PATCH 3/4] added tests --- macros/timestamp_to_utc.sql | 5 +++-- models/staging/hubspot/schema.yml | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/macros/timestamp_to_utc.sql b/macros/timestamp_to_utc.sql index 20cac93..59cbefe 100644 --- a/macros/timestamp_to_utc.sql +++ b/macros/timestamp_to_utc.sql @@ -12,11 +12,12 @@ and ensures that the output is properly formatted for further analysis. {% macro timestamp_to_utc(column_name) %} to_timestamp( {{ adapter.quote(column_name) }}::double precision / 1000 - ) at time zone 'UTC' as {{ column_name | replace("At", "") }}_at_utc, + ) at time zone 'UTC' + as {{ column_name | replace("At", "") | replace("Date", "") }}_at_utc, cast( to_timestamp( {{ adapter.quote(column_name) }}::double precision / 1000 ) at time zone 'UTC' as date - ) as {{ column_name | replace("At", "") }}_date_utc + ) as {{ column_name | replace("At", "") | replace("Date", "") }}_date_utc {% endmacro %} diff --git a/models/staging/hubspot/schema.yml b/models/staging/hubspot/schema.yml index cb0f7dd..974c6c2 100644 --- a/models/staging/hubspot/schema.yml +++ b/models/staging/hubspot/schema.yml @@ -121,6 +121,9 @@ models: - name: id_form data_type: character varying description: "Unique id for each form submission." + tests: + - not_null + - unique - name: values data_type: jsonb From 751a95f9aeebf4f65b8678bc6510d8b23e3c8ea3 Mon Sep 17 00:00:00 2001 From: Joaquin Ossa Date: Fri, 4 Oct 2024 08:42:28 +0200 Subject: [PATCH 4/4] modified macro --- ...tamp_to_utc.sql => unix_ms_timestamp_to_utc.sql} | 13 ++----------- .../hubspot/stg_hubspot__form_submissions.sql | 10 ++++++++-- 2 files changed, 10 insertions(+), 13 deletions(-) rename macros/{timestamp_to_utc.sql => unix_ms_timestamp_to_utc.sql} (51%) diff --git a/macros/timestamp_to_utc.sql b/macros/unix_ms_timestamp_to_utc.sql similarity index 51% rename from macros/timestamp_to_utc.sql rename to macros/unix_ms_timestamp_to_utc.sql index 59cbefe..e342ecd 100644 --- a/macros/timestamp_to_utc.sql +++ b/macros/unix_ms_timestamp_to_utc.sql @@ -9,15 +9,6 @@ It generates two output fields: This macro is intended to be used within a SELECT statement and ensures that the output is properly formatted for further analysis. */ -{% macro timestamp_to_utc(column_name) %} - to_timestamp( - {{ adapter.quote(column_name) }}::double precision / 1000 - ) at time zone 'UTC' - as {{ column_name | replace("At", "") | replace("Date", "") }}_at_utc, - - cast( - to_timestamp( - {{ adapter.quote(column_name) }}::double precision / 1000 - ) at time zone 'UTC' as date - ) as {{ column_name | replace("At", "") | replace("Date", "") }}_date_utc +{% macro unix_ms_timestamp_to_utc(column_name) %} + to_timestamp({{ adapter.quote(column_name) }} / 1000) at time zone 'UTC' {% endmacro %} diff --git a/models/staging/hubspot/stg_hubspot__form_submissions.sql b/models/staging/hubspot/stg_hubspot__form_submissions.sql index 705a952..7ec9ed4 100644 --- a/models/staging/hubspot/stg_hubspot__form_submissions.sql +++ b/models/staging/hubspot/stg_hubspot__form_submissions.sql @@ -4,8 +4,14 @@ with select {{ adapter.quote("formId") }} as id_form, {{ adapter.quote("values") }} as "values", - {{ timestamp_to_utc("submittedAt") }}, - {{ timestamp_to_utc("updatedAt") }}, + {{ unix_ms_timestamp_to_utc("submittedAt") }} as submitted_at_utc, + cast( + {{ unix_ms_timestamp_to_utc("submittedAt") }} as date + ) as submitted_date_utc, + {{ unix_ms_timestamp_to_utc("updatedAt") }} as updated_at_utc, + cast( + {{ unix_ms_timestamp_to_utc("updatedAt") }} as date + ) as updated_date_utc, {{ adapter.quote("_airbyte_extracted_at") }} as dwh_extracted_at_utc from raw_form_submissions )