Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

Panel
panelIconIdatlassian-note
panelIcon:note:
bgColor#E6FCFF

Notes
Several Phone numbers were identified to be shared between contacts as part of the Phone data assessment
Approach:
Select Contacts that are not part of the same household
Check if the Last name of the contacts match
Fuzzy match Phone numbers

Expand
titleSnowflake Script used to identify dupe contact Mobile across multiple household contacts
Code Block
/* Contact to Contact Dupes - Not part of the same Household Account, Last Names do not match*/
 
    with contact_ph as (
        select
            c.id "Contact SF ID",
            c.accountid "AccountID",
            c.name "Name",
            c.lastname "Lastname",
            c.mobilephone "Contact Phone",
            c.aqb__phonepreference__c "Phone Preference",
            a.aqb__accounttype__c "Account Type",
            case
                when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '')
                when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone", '^(000)', '0')
                when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone", '^(00)', '0')
                when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02')
                when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03')
                when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04')
                when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07')
                when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08')
                when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02')
                when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03')
                when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04')
                when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07')
                when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08')
                else "Formatted Phone"
            end "Formatted Phone"
        from(
                select
                    cp.*,
                    regexp_replace(lower(trim(cp.mobilephone)), '[^a-z0-9]', '') "Formatted Phone"
                from
                    salesforce_db.salesforce_sch.contact cp
            ) c
            left join salesforce_db.salesforce_sch.user u on u.id = c.createdbyid
            left join salesforce_db.salesforce_sch.user u1 on u1.id = c.lastmodifiedbyid
            left join salesforce_db.salesforce_sch.account a on a.id = c.accountid
            left join recordtype r on r.id = a.recordtypeid
        where
            c.mobilephone is not null and c.aqb__type__c <> 'Student' and r.name = 'Household'
    ),
    contact_dupe_ph as (
        select
            c.id "Contact SF ID:Dupe",
            c.accountid "AccountID:Dupe",
            c.name "Name:Dupe",
            c.lastname "Lastname:Dupe",
            c.mobilephone "Contact Phone:Dupe",
            c.aqb__phonepreference__c "Phone Preference:Dupe",
            a.aqb__accounttype__c "Account Type:Dupe",
            case
                when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '')
                when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone", '^(000)', '0')
                when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone", '^(00)', '0')
                when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02')
                when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03')
                when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04')
                when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07')
                when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08')
                when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02')
                when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03')
                when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04')
                when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07')
                when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08')
                else "Formatted Phone"
            end "Formatted Phone:Dupe"
        from(
                select
                    cp.*,
                    regexp_replace(lower(trim(cp.mobilephone)), '[^a-z0-9]', '') "Formatted Phone"
                from
                    salesforce_db.salesforce_sch.contact cp
            ) c
            left join salesforce_db.salesforce_sch.user u on u.id = c.createdbyid
            left join salesforce_db.salesforce_sch.user u1 on u1.id = c.lastmodifiedbyid
            left join salesforce_db.salesforce_sch.account a on a.id = c.accountid
            left join recordtype r on r.id = a.recordtypeid
        where
            c.mobilephone is not null and c.aqb__type__c <> 'Student' and r.name = 'Household'
    ),
    Dupes as (
        select
            *
        from
            (
                select
                    *
                from
                    contact_ph cp
            ) dd
            join contact_dupe_ph cdp on "Contact SF ID" <> "Contact SF ID:Dupe"
            and "Formatted Phone" = "Formatted Phone:Dupe"
            and "AccountID" <> "AccountID:Dupe"
            and "Lastname" <> "Lastname:Dupe"
        order by
            2,
            3,
            1
    )
select
    *
from
    dupes
order by
    1;

Expand
titleSnowflake Script used to identify phone numbers shared across multiple contacts in Alternate Channel
Code Block
with ac_primary as
(
select ac.id "Alt Channel SF ID:Primary",
        ac.name "Alt Channel Name:Primary",
        ac.aqb__contact__c "ContactID:Primary",
        c.name "Contact Name: Primary",
        c.lastname "Lastname:Primary",
        ac.aqb__account__c "AccountID:Primary",
        regexp_replace(split_part(ac.aqb__linkedto__c , '"',2), '/','') "Linked To:Primary",
        ac.aqb__status__c "Status:Primary",
        ac.aqb__type__c "Type:Primary",
        ac.aqb__phone__c "Phone:Primary",
        
        case 
        when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '')
        when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone",  '^(000)', '0')
        when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone",  '^(00)', '0')
        when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02')
        when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03')
        when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04')
        when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07')
        when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08')
        when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02')
        when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03')
        when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04')
        when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07')
        when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08')
        else "Formatted Phone"
        end "Formatted Phone:Primary",
        ac.createddate "CreatedDate:Primary",
        ac.lastmodifieddate "LastModifiedDate:Primary",
        u.name "CreatedBy:Primary",
        u1.name "LastModifiedBy:Primary",
        ac.aqb__source__c "Source:Primary",
        row_number() over(partition by "Linked To:Primary", "Formatted Phone:Primary" order by "CreatedDate:Primary" desc ) seq
        from(select a.*,
                    regexp_replace(lower(trim(a.aqb__phone__c)), '[^a-z0-9]', '') "Formatted Phone"
                    from salesforce_db.salesforce_sch.aqb__aqaddress__c a
                    )ac
        left join salesforce_db.salesforce_sch.recordtype r on r.id = ac.recordtypeid
        left join salesforce_db.salesforce_sch.user u on u.id = ac.createdbyid
        left join salesforce_db.salesforce_sch.user u1 on u1.id = ac.lastmodifiedbyid
        left join salesforce_db.salesforce_sch.contact c on left(c.id,15) = "Linked To:Primary"
        --left join salesforce_db.salesforce_sch.account a on a.id = ac.aqb__account__c
        where 
        r.name = 'Phone' and "Linked To:Primary" like '003%'
        order by 3,7

),


ac_dupes as
(
select ac.id "Alt Channel SF ID:Dupe",
        ac.name "Alt Channel Name:Dupe",
        ac.aqb__contact__c "ContactID:Dupe",
        c.name "Contact Name: Dupe",
        c.lastname "Lastname:Dupe",
        regexp_replace(split_part(ac.aqb__linkedto__c , '"',2), '/','') "Linked To:Dupe",
        ac.aqb__status__c "Status:Dupe",
        ac.aqb__type__c "Type:Dupe",
        ac.aqb__phone__c "Phone:Dupe",

        case 
        when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '')
        when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone",  '^(000)', '0')
        when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone",  '^(00)', '0')
        when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02')
        when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03')
        when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04')
        when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07')
        when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08')
        when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02')
        when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03')
        when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04')
        when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07')
        when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08')
        else "Formatted Phone"
        end "Formatted Phone:Dupe",
        ac.createddate "CreatedDate:Dupe",
        ac.lastmodifieddate "LastModifiedDate:Dupe",
        u.name "CreatedBy:Dupe",
        u1.name "LastModifiedBy:Dupe",
        ac.aqb__source__c "Source:Dupe",
        ac.aqb__linkedto__c,
        row_number() over(partition by "Linked To:Dupe" order by "CreatedDate:Dupe" desc ) seq
        from(select a.*,
                    regexp_replace(lower(trim(a.aqb__phone__c)), '[^a-z0-9]', '') "Formatted Phone"
                    from salesforce_db.salesforce_sch.aqb__aqaddress__c a
                    )ac
        left join salesforce_db.salesforce_sch.recordtype r on r.id = ac.recordtypeid
        left join salesforce_db.salesforce_sch.user u on u.id = ac.createdbyid
        left join salesforce_db.salesforce_sch.user u1 on u1.id = ac.lastmodifiedbyid
        left join salesforce_db.salesforce_sch.contact c on left(c.id,15) = "Linked To:Dupe"
        --left join salesforce_db.salesforce_sch.account a on left(a.id,15) = "Linked To:Dupe"
        where 
        r.name = 'Phone' and "Linked To:Dupe" like '003%'
),

Dupes as
( select *,        
       case
       when  "Lastname:Primary" =  "Lastname:Dupe" then
       'Yes'
       else
       'No'
       end "Last Name Match" from (
       select * from ac_primary acp 
       where acp.seq = 1
            )dd
       join ac_dupes acd on "Formatted Phone:Primary" = "Formatted Phone:Dupe" 
       where "Alt Channel Name:Primary" <> "Alt Channel Name:Dupe" and "Linked To:Primary" <> "Linked To:Dupe"
       order by 2,3,1
)

select "Alt Channel SF ID:Primary",
       "Alt Channel Name:Primary",
       "ContactID:Primary",
       "Contact Name: Primary",
       "AccountID:Primary",
       "Linked To:Primary",
       "Phone:Primary",
       "Formatted Phone:Primary",
       "Status:Primary",
       "Type:Primary",
       "LastModifiedDate:Primary",       
       "CreatedDate:Primary",
       "LastModifiedBy:Primary",
       "CreatedBy:Primary",
       "Source:Primary",   
       listagg("Linked To:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc)"Linked To:Dupes",
       listagg("Contact Name: Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Contact Name:Dupes",
       listagg("Alt Channel SF ID:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc)"Alt Channel ID:Dupes",
       listagg("Alt Channel Name:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Alt Channel Names:Dupes",
       listagg("Phone:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Phones:Dupes",
       listagg("Formatted Phone:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Formatted Phones:Dupes",
       listagg("Status:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Status:Dupes",
       listagg("Type:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Type:Dupes",
       listagg("CreatedBy:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "CreatedBy:Dupes",
       listagg("CreatedDate:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "CreatedDate:Dupes",
       listagg("LastModifiedBy:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "LastModifiedBy:Dupe",
       listagg("LastModifiedDate:Dupe",',') within group (order by "LastModifiedDate:Dupe" desc) "LastModifiedDate:Dupe",
       listagg("Source:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Source:Dupes",
       listagg("Last Name Match", ',') within group (order by "LastModifiedDate:Dupe" desc) "Match",
       count("Alt Channel SF ID:Dupe") "No of Dupes"
       from(
        Select PT1.*, Rank() OVER (  PARTITION BY "Linked To:Dupe"
                            ORDER BY RNK1,RNK2 ASC
                            ) AS RNK3 /* Rank all contact matches by contact_id2 to remove duplicate matches when filtere by rank3 = 1 */
        from 
        (
        select d.*, Rank() OVER (
                            ORDER BY "Linked To:Primary" ASC
                            ) AS RNK1 /* Rank al contact IDs in ascending order */
          ,   Rank() OVER ( PARTITION BY "Linked To:Primary"
                            ORDER BY "Linked To:Dupe" ASC
                            ) AS RNK2 /* Rank contacts linked to each dontact_Id1 in ascending order */
        from dupes d
       )PT1
       ORDER BY RNK1 ASC,RNK2 ASC
          )WHERE RNK3 = 1
          and contains("Last Name Match", 'No')
          GROUP BY ALL;