First version of name unification for a deal

This commit is contained in:
uri 2024-09-06 17:08:50 +02:00
parent 1e89966153
commit f18a2eb520
2 changed files with 65 additions and 11 deletions

View file

@ -24,23 +24,64 @@ with
c.country_name as main_billing_country_name_per_deal, c.country_name as main_billing_country_name_per_deal,
c.iso_2 as main_billing_country_iso_2_per_deal, c.iso_2 as main_billing_country_iso_2_per_deal,
c.iso_3 as main_billing_country_iso_3_per_deal c.iso_3 as main_billing_country_iso_3_per_deal
from billing_countries_per_deal bcpd from billing_countries_per_deal bcpd
left join int_core__country c left join int_core__country c on bcpd.id_billing_country = c.id_country
on bcpd.id_billing_country = c.id_country
where bcpd.rn = 1 where bcpd.rn = 1
),
potential_name_per_deal as (
select
id_deal,
trim(
regexp_replace(potential_name, '[0-9\(\)\-\.\,@]', '', 'g')
) as potential_deal_name,
count(*) as potential_name_count
from
(
select id_deal, first_name as potential_name
from int_core__unified_user
where id_deal is not null
union all
select id_deal, last_name as potential_name
from int_core__unified_user
where id_deal is not null
union all
select id_deal, company_name as potential_name
from int_core__unified_user
where id_deal is not null
) as names
-- The following removes tons of names that are just Bookings,
-- LTD, LLC, INC, or empty strings
where
trim(upper(potential_name)) not in ('BOOKINGS', 'LTD', 'LLC', 'INC', '')
and potential_name is not null
group by 1, 2
),
unique_name_per_deal as (
select id_deal, potential_deal_name as main_deal_name
from
(
select
id_deal,
potential_deal_name,
potential_name_count,
row_number() over (
partition by id_deal order by potential_name_count desc
) as rn
from potential_name_per_deal
) as ranked_names
where rn = 1
) )
select select
uu.id_deal, uu.id_deal,
unpd.main_deal_name,
mbcpd.main_id_billing_country_per_deal, mbcpd.main_id_billing_country_per_deal,
mbcpd.main_billing_country_name_per_deal, mbcpd.main_billing_country_name_per_deal,
mbcpd.main_billing_country_iso_2_per_deal, mbcpd.main_billing_country_iso_2_per_deal,
mbcpd.main_billing_country_iso_3_per_deal, mbcpd.main_billing_country_iso_3_per_deal,
count(distinct uu.id_user) as users_with_this_id_deal, count(distinct uu.id_user) as users_with_this_id_deal,
count( count(distinct uu.billing_country_iso_3) as billing_countries_for_this_id_deal
distinct uu.billing_country_iso_3 from main_billing_country_per_deal mbcpd
) as billing_countries_for_this_id_deal left join int_core__unified_user uu on uu.id_deal = mbcpd.id_deal
from main_billing_country_per_deal mbcpd left join unique_name_per_deal unpd on uu.id_deal = unpd.id_deal
left join int_core__unified_user uu
on uu.id_deal = mbcpd.id_deal
where mbcpd.id_deal is not null where mbcpd.id_deal is not null
group by 1, 2, 3, 4, 5 group by 1, 2, 3, 4, 5, 6

View file

@ -2971,7 +2971,9 @@ models:
the same time, not all users that act as hosts have a deal associated. One the same time, not all users that act as hosts have a deal associated. One
example is Know Your Guest (KYG) Lite accounts. However, there's also historical example is Know Your Guest (KYG) Lite accounts. However, there's also historical
cases that for whatever reason there's no Deal associated. cases that for whatever reason there's no Deal associated.
For this model, the billing country and the deal name are estimated based on
the information available in int_core__unified_user.
columns: columns:
- name: id_deal - name: id_deal
data_type: character varying data_type: character varying
@ -2979,6 +2981,17 @@ models:
tests: tests:
- not_null - not_null
- unique - unique
- name: main_deal_name
data_type: string
description: |
Main name for this ID deal. It's a clean version of
the most repeated name within the user tables in the
fields of first_name, last_name and company name.
This field should be modified at the moment we have
a proper way to retrieve a common account name per deal.
It can contain duplicates.
tests:
- not_null
- name: main_id_billing_country_per_deal - name: main_id_billing_country_per_deal
data_type: integer data_type: integer
description: | description: |