Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

Panel
panelIconIdatlassian-note
panelIcon:note:
bgColor#E6FCFF

Notes
Several Phone numbers were identified to be shared between contacts as part of the Phone data assessment
Approach:
Phone numbers linked to contact records were isolated and cleaned to remove -

  • Any special characters, blank spaces etc.,

  • +61 was replaced with 0

  • 00s & 000s were replace with 0

  • 00011 was replace with blank

  • Select Contacts that are not part of the same household

  • Check if the Last name of the contacts match

  • Fuzzy match Phone numbers

Expand
titleSnowflake Script used to identify dupe contact Mobile across multiple household contacts
Code Block
/* Contact to Contact Dupes - Not part of the same Household Account, Last Names do not match*/
 
    with contact_ph as (
        select
            c.id "Contact SF ID",
            c.accountid "AccountID",
            c.name "Name",
            c.lastname "Lastname",
            c.mobilephone "Contact Phone",
            c.aqb__phonepreference__c "Phone Preference",
            a.aqb__accounttype__c "Account Type",
            case
                when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '')
                when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone", '^(000)', '0')
                when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone", '^(00)', '0')
                when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02')
                when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03')
                when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04')
                when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07')
                when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08')
                when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02')
                when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03')
                when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04')
                when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07')
                when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08')
                else "Formatted Phone"
            end "Formatted Phone"
        from(
                select
                    cp.*,
                    regexp_replace(lower(trim(cp.mobilephone)), '[^a-z0-9]', '') "Formatted Phone"
                from
                    salesforce_db.salesforce_sch.contact cp
            ) c
            left join salesforce_db.salesforce_sch.user u on u.id = c.createdbyid
            left join salesforce_db.salesforce_sch.user u1 on u1.id = c.lastmodifiedbyid
            left join salesforce_db.salesforce_sch.account a on a.id = c.accountid
            left join recordtype r on r.id = a.recordtypeid
        where
            c.mobilephone is not null and c.aqb__type__c <> 'Student' and r.name = 'Household'
    ),
    contact_dupe_ph as (
        select
            c.id "Contact SF ID:Dupe",
            c.accountid "AccountID:Dupe",
            c.name "Name:Dupe",
            c.lastname "Lastname:Dupe",
            c.mobilephone "Contact Phone:Dupe",
            c.aqb__phonepreference__c "Phone Preference:Dupe",
            a.aqb__accounttype__c "Account Type:Dupe",
            case
                when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '')
                when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone", '^(000)', '0')
                when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone", '^(00)', '0')
                when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02')
                when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03')
                when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04')
                when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07')
                when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08')
                when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02')
                when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03')
                when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04')
                when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07')
                when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08')
                else "Formatted Phone"
            end "Formatted Phone:Dupe"
        from(
                select
                    cp.*,
                    regexp_replace(lower(trim(cp.mobilephone)), '[^a-z0-9]', '') "Formatted Phone"
                from
                    salesforce_db.salesforce_sch.contact cp
            ) c
            left join salesforce_db.salesforce_sch.user u on u.id = c.createdbyid
            left join salesforce_db.salesforce_sch.user u1 on u1.id = c.lastmodifiedbyid
            left join salesforce_db.salesforce_sch.account a on a.id = c.accountid
            left join recordtype r on r.id = a.recordtypeid
        where
            c.mobilephone is not null and c.aqb__type__c <> 'Student' and r.name = 'Household'
    ),
    Dupes as (
        select
            *
        from
            (
                select
                    *
                from
                    contact_ph cp
            ) dd
            join contact_dupe_ph cdp on "Contact SF ID" <> "Contact SF ID:Dupe"
            and "Formatted Phone" = "Formatted Phone:Dupe"
            and "AccountID" <> "AccountID:Dupe"
            and "Lastname" <> "Lastname:Dupe"
        order by
            2,
            3,
            1
    )
select
    *
from
    dupes
order by
    1;

Expand
titleSnowflake Script used to identify phone numbers shared across multiple contacts in Alternate Channel
Code Block
with ac_primary as
(
select ac.id "Alt Channel SF ID:Primary",
        ac.name "Alt Channel Name:Primary",
        ac.aqb__contact__c "ContactID:Primary",
        c.name "Contact Name: Primary",
        c.lastname "Lastname:Primary",
        ac.aqb__account__c "AccountID:Primary",
        regexp_replace(split_part(ac.aqb__linkedto__c , '"',2), '/','') "Linked To:Primary",
        ac.aqb__status__c "Status:Primary",
        ac.aqb__type__c "Type:Primary",
        ac.aqb__phone__c "Phone:Primary",
        
        case 
        when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '')
        when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone",  '^(000)', '0')
        when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone",  '^(00)', '0')
        when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02')
        when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03')
        when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04')
        when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07')
        when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08')
        when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02')
        when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03')
        when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04')
        when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07')
        when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08')
        else "Formatted Phone"
        end "Formatted Phone:Primary",
        ac.createddate "CreatedDate:Primary",
        ac.lastmodifieddate "LastModifiedDate:Primary",
        u.name "CreatedBy:Primary",
        u1.name "LastModifiedBy:Primary",
        ac.aqb__source__c "Source:Primary",
        row_number() over(partition by "Linked To:Primary", "Formatted Phone:Primary" order by "CreatedDate:Primary" desc ) seq
        from(select a.*,
                    regexp_replace(lower(trim(a.aqb__phone__c)), '[^a-z0-9]', '') "Formatted Phone"
                    from salesforce_db.salesforce_sch.aqb__aqaddress__c a
                    )ac
        left join salesforce_db.salesforce_sch.recordtype r on r.id = ac.recordtypeid
        left join salesforce_db.salesforce_sch.user u on u.id = ac.createdbyid
        left join salesforce_db.salesforce_sch.user u1 on u1.id = ac.lastmodifiedbyid
        left join salesforce_db.salesforce_sch.contact c on left(c.id,15) = "Linked To:Primary"
        --left join salesforce_db.salesforce_sch.account a on a.id = ac.aqb__account__c
        where 
        r.name = 'Phone' and "Linked To:Primary" like '003%'
        order by 3,7

),


ac_dupes as
(
select ac.id "Alt Channel SF ID:Dupe",
        ac.name "Alt Channel Name:Dupe",
        ac.aqb__contact__c "ContactID:Dupe",
        c.name "Contact Name: Dupe",
        c.lastname "Lastname:Dupe",
        regexp_replace(split_part(ac.aqb__linkedto__c , '"',2), '/','') "Linked To:Dupe",
        ac.aqb__status__c "Status:Dupe",
        ac.aqb__type__c "Type:Dupe",
        ac.aqb__phone__c "Phone:Dupe",

        case 
        when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '')
        when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone",  '^(000)', '0')
        when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone",  '^(00)', '0')
        when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02')
        when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03')
        when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04')
        when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07')
        when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08')
        when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02')
        when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03')
        when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04')
        when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07')
        when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08')
        else "Formatted Phone"
        end "Formatted Phone:Dupe",
        ac.createddate "CreatedDate:Dupe",
        ac.lastmodifieddate "LastModifiedDate:Dupe",
        u.name "CreatedBy:Dupe",
        u1.name "LastModifiedBy:Dupe",
        ac.aqb__source__c "Source:Dupe",
        ac.aqb__linkedto__c,
        row_number() over(partition by "Linked To:Dupe" order by "CreatedDate:Dupe" desc ) seq
        from(select a.*,
                    regexp_replace(lower(trim(a.aqb__phone__c)), '[^a-z0-9]', '') "Formatted Phone"
                    from salesforce_db.salesforce_sch.aqb__aqaddress__c a
                    )ac
        left join salesforce_db.salesforce_sch.recordtype r on r.id = ac.recordtypeid
        left join salesforce_db.salesforce_sch.user u on u.id = ac.createdbyid
        left join salesforce_db.salesforce_sch.user u1 on u1.id = ac.lastmodifiedbyid
        left join salesforce_db.salesforce_sch.contact c on left(c.id,15) = "Linked To:Dupe"
        --left join salesforce_db.salesforce_sch.account a on left(a.id,15) = "Linked To:Dupe"
        where 
        r.name = 'Phone' and "Linked To:Dupe" like '003%'
),

Dupes as
( select *,        
       case
       when  "Lastname:Primary" =  "Lastname:Dupe" then
       'Yes'
       else
       'No'
       end "Last Name Match" from (
       select * from ac_primary acp 
       where acp.seq = 1
            )dd
       join ac_dupes acd on "Formatted Phone:Primary" = "Formatted Phone:Dupe" 
       where "Alt Channel Name:Primary" <> "Alt Channel Name:Dupe" and "Linked To:Primary" <> "Linked To:Dupe"
       order by 2,3,1
)

select "Alt Channel SF ID:Primary",
       "Alt Channel Name:Primary",
       "ContactID:Primary",
       "Contact Name: Primary",
       "AccountID:Primary",
       "Linked To:Primary",
       "Phone:Primary",
       "Formatted Phone:Primary",
       "Status:Primary",
       "Type:Primary",
       "LastModifiedDate:Primary",       
       "CreatedDate:Primary",
       "LastModifiedBy:Primary",
       "CreatedBy:Primary",
       "Source:Primary",   
       listagg("Linked To:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc)"Linked To:Dupes",
       listagg("Contact Name: Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Contact Name:Dupes",
       listagg("Alt Channel SF ID:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc)"Alt Channel ID:Dupes",
       listagg("Alt Channel Name:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Alt Channel Names:Dupes",
       listagg("Phone:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Phones:Dupes",
       listagg("Formatted Phone:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Formatted Phones:Dupes",
       listagg("Status:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Status:Dupes",
       listagg("Type:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Type:Dupes",
       listagg("CreatedBy:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "CreatedBy:Dupes",
       listagg("CreatedDate:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "CreatedDate:Dupes",
       listagg("LastModifiedBy:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "LastModifiedBy:Dupe",
       listagg("LastModifiedDate:Dupe",',') within group (order by "LastModifiedDate:Dupe" desc) "LastModifiedDate:Dupe",
       listagg("Source:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Source:Dupes",
       listagg("Last Name Match", ',') within group (order by "LastModifiedDate:Dupe" desc) "Match",
       count("Alt Channel SF ID:Dupe") "No of Dupes"
       from(
        Select PT1.*, Rank() OVER (  PARTITION BY "Linked To:Dupe"
                            ORDER BY RNK1,RNK2 ASC
                            ) AS RNK3 /* Rank all contact matches by contact_id2 to remove duplicate matches when filtere by rank3 = 1 */
        from 
        (
        select d.*, Rank() OVER (
                            ORDER BY "Linked To:Primary" ASC
                            ) AS RNK1 /* Rank al contact IDs in ascending order */
          ,   Rank() OVER ( PARTITION BY "Linked To:Primary"
                            ORDER BY "Linked To:Dupe" ASC
                            ) AS RNK2 /* Rank contacts linked to each dontact_Id1 in ascending order */
        from dupes d
       )PT1
       ORDER BY RNK1 ASC,RNK2 ASC
          )WHERE RNK3 = 1
          and contains("Last Name Match", 'No')
          GROUP BY ALL;