First version of name unification for a deal
This commit is contained in:
parent
1e89966153
commit
f18a2eb520
2 changed files with 65 additions and 11 deletions
|
|
@ -24,23 +24,64 @@ with
|
||||||
c.country_name as main_billing_country_name_per_deal,
|
c.country_name as main_billing_country_name_per_deal,
|
||||||
c.iso_2 as main_billing_country_iso_2_per_deal,
|
c.iso_2 as main_billing_country_iso_2_per_deal,
|
||||||
c.iso_3 as main_billing_country_iso_3_per_deal
|
c.iso_3 as main_billing_country_iso_3_per_deal
|
||||||
from billing_countries_per_deal bcpd
|
from billing_countries_per_deal bcpd
|
||||||
left join int_core__country c
|
left join int_core__country c on bcpd.id_billing_country = c.id_country
|
||||||
on bcpd.id_billing_country = c.id_country
|
|
||||||
where bcpd.rn = 1
|
where bcpd.rn = 1
|
||||||
|
),
|
||||||
|
potential_name_per_deal as (
|
||||||
|
select
|
||||||
|
id_deal,
|
||||||
|
trim(
|
||||||
|
regexp_replace(potential_name, '[0-9\(\)\-\.\,@]', '', 'g')
|
||||||
|
) as potential_deal_name,
|
||||||
|
count(*) as potential_name_count
|
||||||
|
from
|
||||||
|
(
|
||||||
|
select id_deal, first_name as potential_name
|
||||||
|
from int_core__unified_user
|
||||||
|
where id_deal is not null
|
||||||
|
union all
|
||||||
|
select id_deal, last_name as potential_name
|
||||||
|
from int_core__unified_user
|
||||||
|
where id_deal is not null
|
||||||
|
union all
|
||||||
|
select id_deal, company_name as potential_name
|
||||||
|
from int_core__unified_user
|
||||||
|
where id_deal is not null
|
||||||
|
) as names
|
||||||
|
-- The following removes tons of names that are just Bookings,
|
||||||
|
-- LTD, LLC, INC, or empty strings
|
||||||
|
where
|
||||||
|
trim(upper(potential_name)) not in ('BOOKINGS', 'LTD', 'LLC', 'INC', '')
|
||||||
|
and potential_name is not null
|
||||||
|
group by 1, 2
|
||||||
|
),
|
||||||
|
unique_name_per_deal as (
|
||||||
|
select id_deal, potential_deal_name as main_deal_name
|
||||||
|
from
|
||||||
|
(
|
||||||
|
select
|
||||||
|
id_deal,
|
||||||
|
potential_deal_name,
|
||||||
|
potential_name_count,
|
||||||
|
row_number() over (
|
||||||
|
partition by id_deal order by potential_name_count desc
|
||||||
|
) as rn
|
||||||
|
from potential_name_per_deal
|
||||||
|
) as ranked_names
|
||||||
|
where rn = 1
|
||||||
)
|
)
|
||||||
select
|
select
|
||||||
uu.id_deal,
|
uu.id_deal,
|
||||||
|
unpd.main_deal_name,
|
||||||
mbcpd.main_id_billing_country_per_deal,
|
mbcpd.main_id_billing_country_per_deal,
|
||||||
mbcpd.main_billing_country_name_per_deal,
|
mbcpd.main_billing_country_name_per_deal,
|
||||||
mbcpd.main_billing_country_iso_2_per_deal,
|
mbcpd.main_billing_country_iso_2_per_deal,
|
||||||
mbcpd.main_billing_country_iso_3_per_deal,
|
mbcpd.main_billing_country_iso_3_per_deal,
|
||||||
count(distinct uu.id_user) as users_with_this_id_deal,
|
count(distinct uu.id_user) as users_with_this_id_deal,
|
||||||
count(
|
count(distinct uu.billing_country_iso_3) as billing_countries_for_this_id_deal
|
||||||
distinct uu.billing_country_iso_3
|
from main_billing_country_per_deal mbcpd
|
||||||
) as billing_countries_for_this_id_deal
|
left join int_core__unified_user uu on uu.id_deal = mbcpd.id_deal
|
||||||
from main_billing_country_per_deal mbcpd
|
left join unique_name_per_deal unpd on uu.id_deal = unpd.id_deal
|
||||||
left join int_core__unified_user uu
|
|
||||||
on uu.id_deal = mbcpd.id_deal
|
|
||||||
where mbcpd.id_deal is not null
|
where mbcpd.id_deal is not null
|
||||||
group by 1, 2, 3, 4, 5
|
group by 1, 2, 3, 4, 5, 6
|
||||||
|
|
|
||||||
|
|
@ -2971,7 +2971,9 @@ models:
|
||||||
the same time, not all users that act as hosts have a deal associated. One
|
the same time, not all users that act as hosts have a deal associated. One
|
||||||
example is Know Your Guest (KYG) Lite accounts. However, there's also historical
|
example is Know Your Guest (KYG) Lite accounts. However, there's also historical
|
||||||
cases that for whatever reason there's no Deal associated.
|
cases that for whatever reason there's no Deal associated.
|
||||||
|
For this model, the billing country and the deal name are estimated based on
|
||||||
|
the information available in int_core__unified_user.
|
||||||
|
|
||||||
columns:
|
columns:
|
||||||
- name: id_deal
|
- name: id_deal
|
||||||
data_type: character varying
|
data_type: character varying
|
||||||
|
|
@ -2979,6 +2981,17 @@ models:
|
||||||
tests:
|
tests:
|
||||||
- not_null
|
- not_null
|
||||||
- unique
|
- unique
|
||||||
|
- name: main_deal_name
|
||||||
|
data_type: string
|
||||||
|
description: |
|
||||||
|
Main name for this ID deal. It's a clean version of
|
||||||
|
the most repeated name within the user tables in the
|
||||||
|
fields of first_name, last_name and company name.
|
||||||
|
This field should be modified at the moment we have
|
||||||
|
a proper way to retrieve a common account name per deal.
|
||||||
|
It can contain duplicates.
|
||||||
|
tests:
|
||||||
|
- not_null
|
||||||
- name: main_id_billing_country_per_deal
|
- name: main_id_billing_country_per_deal
|
||||||
data_type: integer
|
data_type: integer
|
||||||
description: |
|
description: |
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue