- Created by Binila Russel , last modified on Jun 06, 2024
You are viewing an old version of this content. View the current version.
Compare with Current View Version History
« Previous Version 4 Current »
Notes
Several Phone numbers were identified to be shared between contacts as part of the Phone data assessment
Approach:
Select Contacts that are not part of the same household
Check if the Last name of the contacts match
Fuzzy match Phone numbers
Snowflake Script used to identify dupe contact Mobile across multiple household contacts
/* Contact to Contact Dupes - Not part of the same Household Account, Last Names do not match*/ with contact_ph as ( select c.id "Contact SF ID", c.accountid "AccountID", c.name "Name", c.lastname "Lastname", c.mobilephone "Contact Phone", c.aqb__phonepreference__c "Phone Preference", a.aqb__accounttype__c "Account Type", case when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '') when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone", '^(000)', '0') when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone", '^(00)', '0') when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02') when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03') when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04') when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07') when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08') when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02') when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03') when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04') when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07') when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08') else "Formatted Phone" end "Formatted Phone" from( select cp.*, regexp_replace(lower(trim(cp.mobilephone)), '[^a-z0-9]', '') "Formatted Phone" from salesforce_db.salesforce_sch.contact cp ) c left join salesforce_db.salesforce_sch.user u on u.id = c.createdbyid left join salesforce_db.salesforce_sch.user u1 on u1.id = c.lastmodifiedbyid left join salesforce_db.salesforce_sch.account a on a.id = c.accountid left join recordtype r on r.id = a.recordtypeid where c.mobilephone is not null and c.aqb__type__c <> 'Student' and r.name = 'Household' ), contact_dupe_ph as ( select c.id "Contact SF ID:Dupe", c.accountid "AccountID:Dupe", c.name "Name:Dupe", c.lastname "Lastname:Dupe", c.mobilephone "Contact Phone:Dupe", c.aqb__phonepreference__c "Phone Preference:Dupe", a.aqb__accounttype__c "Account Type:Dupe", case when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '') when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone", '^(000)', '0') when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone", '^(00)', '0') when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02') when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03') when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04') when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07') when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08') when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02') when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03') when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04') when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07') when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08') else "Formatted Phone" end "Formatted Phone:Dupe" from( select cp.*, regexp_replace(lower(trim(cp.mobilephone)), '[^a-z0-9]', '') "Formatted Phone" from salesforce_db.salesforce_sch.contact cp ) c left join salesforce_db.salesforce_sch.user u on u.id = c.createdbyid left join salesforce_db.salesforce_sch.user u1 on u1.id = c.lastmodifiedbyid left join salesforce_db.salesforce_sch.account a on a.id = c.accountid left join recordtype r on r.id = a.recordtypeid where c.mobilephone is not null and c.aqb__type__c <> 'Student' and r.name = 'Household' ), Dupes as ( select * from ( select * from contact_ph cp ) dd join contact_dupe_ph cdp on "Contact SF ID" <> "Contact SF ID:Dupe" and "Formatted Phone" = "Formatted Phone:Dupe" and "AccountID" <> "AccountID:Dupe" and "Lastname" <> "Lastname:Dupe" order by 2, 3, 1 ) select * from dupes order by 1;
Snowflake Script used to identify phone numbers shared across multiple contacts in Alternate Channel
with ac_primary as ( select ac.id "Alt Channel SF ID:Primary", ac.name "Alt Channel Name:Primary", ac.aqb__contact__c "ContactID:Primary", c.name "Contact Name: Primary", c.lastname "Lastname:Primary", ac.aqb__account__c "AccountID:Primary", regexp_replace(split_part(ac.aqb__linkedto__c , '"',2), '/','') "Linked To:Primary", ac.aqb__status__c "Status:Primary", ac.aqb__type__c "Type:Primary", ac.aqb__phone__c "Phone:Primary", case when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '') when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone", '^(000)', '0') when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone", '^(00)', '0') when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02') when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03') when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04') when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07') when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08') when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02') when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03') when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04') when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07') when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08') else "Formatted Phone" end "Formatted Phone:Primary", ac.createddate "CreatedDate:Primary", ac.lastmodifieddate "LastModifiedDate:Primary", u.name "CreatedBy:Primary", u1.name "LastModifiedBy:Primary", ac.aqb__source__c "Source:Primary", row_number() over(partition by "Linked To:Primary", "Formatted Phone:Primary" order by "CreatedDate:Primary" desc ) seq from(select a.*, regexp_replace(lower(trim(a.aqb__phone__c)), '[^a-z0-9]', '') "Formatted Phone" from salesforce_db.salesforce_sch.aqb__aqaddress__c a )ac left join salesforce_db.salesforce_sch.recordtype r on r.id = ac.recordtypeid left join salesforce_db.salesforce_sch.user u on u.id = ac.createdbyid left join salesforce_db.salesforce_sch.user u1 on u1.id = ac.lastmodifiedbyid left join salesforce_db.salesforce_sch.contact c on left(c.id,15) = "Linked To:Primary" --left join salesforce_db.salesforce_sch.account a on a.id = ac.aqb__account__c where r.name = 'Phone' and "Linked To:Primary" like '003%' order by 3,7 ), ac_dupes as ( select ac.id "Alt Channel SF ID:Dupe", ac.name "Alt Channel Name:Dupe", ac.aqb__contact__c "ContactID:Dupe", c.name "Contact Name: Dupe", c.lastname "Lastname:Dupe", regexp_replace(split_part(ac.aqb__linkedto__c , '"',2), '/','') "Linked To:Dupe", ac.aqb__status__c "Status:Dupe", ac.aqb__type__c "Type:Dupe", ac.aqb__phone__c "Phone:Dupe", case when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '') when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone", '^(000)', '0') when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone", '^(00)', '0') when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02') when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03') when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04') when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07') when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08') when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02') when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03') when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04') when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07') when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08') else "Formatted Phone" end "Formatted Phone:Dupe", ac.createddate "CreatedDate:Dupe", ac.lastmodifieddate "LastModifiedDate:Dupe", u.name "CreatedBy:Dupe", u1.name "LastModifiedBy:Dupe", ac.aqb__source__c "Source:Dupe", ac.aqb__linkedto__c, row_number() over(partition by "Linked To:Dupe" order by "CreatedDate:Dupe" desc ) seq from(select a.*, regexp_replace(lower(trim(a.aqb__phone__c)), '[^a-z0-9]', '') "Formatted Phone" from salesforce_db.salesforce_sch.aqb__aqaddress__c a )ac left join salesforce_db.salesforce_sch.recordtype r on r.id = ac.recordtypeid left join salesforce_db.salesforce_sch.user u on u.id = ac.createdbyid left join salesforce_db.salesforce_sch.user u1 on u1.id = ac.lastmodifiedbyid left join salesforce_db.salesforce_sch.contact c on left(c.id,15) = "Linked To:Dupe" --left join salesforce_db.salesforce_sch.account a on left(a.id,15) = "Linked To:Dupe" where r.name = 'Phone' and "Linked To:Dupe" like '003%' ), Dupes as ( select *, case when "Lastname:Primary" = "Lastname:Dupe" then 'Yes' else 'No' end "Last Name Match" from ( select * from ac_primary acp where acp.seq = 1 )dd join ac_dupes acd on "Formatted Phone:Primary" = "Formatted Phone:Dupe" where "Alt Channel Name:Primary" <> "Alt Channel Name:Dupe" and "Linked To:Primary" <> "Linked To:Dupe" order by 2,3,1 ) select "Alt Channel SF ID:Primary", "Alt Channel Name:Primary", "ContactID:Primary", "Contact Name: Primary", "AccountID:Primary", "Linked To:Primary", "Phone:Primary", "Formatted Phone:Primary", "Status:Primary", "Type:Primary", "LastModifiedDate:Primary", "CreatedDate:Primary", "LastModifiedBy:Primary", "CreatedBy:Primary", "Source:Primary", listagg("Linked To:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc)"Linked To:Dupes", listagg("Contact Name: Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Contact Name:Dupes", listagg("Alt Channel SF ID:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc)"Alt Channel ID:Dupes", listagg("Alt Channel Name:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Alt Channel Names:Dupes", listagg("Phone:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Phones:Dupes", listagg("Formatted Phone:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Formatted Phones:Dupes", listagg("Status:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Status:Dupes", listagg("Type:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Type:Dupes", listagg("CreatedBy:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "CreatedBy:Dupes", listagg("CreatedDate:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "CreatedDate:Dupes", listagg("LastModifiedBy:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "LastModifiedBy:Dupe", listagg("LastModifiedDate:Dupe",',') within group (order by "LastModifiedDate:Dupe" desc) "LastModifiedDate:Dupe", listagg("Source:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Source:Dupes", listagg("Last Name Match", ',') within group (order by "LastModifiedDate:Dupe" desc) "Match", count("Alt Channel SF ID:Dupe") "No of Dupes" from( Select PT1.*, Rank() OVER ( PARTITION BY "Linked To:Dupe" ORDER BY RNK1,RNK2 ASC ) AS RNK3 /* Rank all contact matches by contact_id2 to remove duplicate matches when filtere by rank3 = 1 */ from ( select d.*, Rank() OVER ( ORDER BY "Linked To:Primary" ASC ) AS RNK1 /* Rank al contact IDs in ascending order */ , Rank() OVER ( PARTITION BY "Linked To:Primary" ORDER BY "Linked To:Dupe" ASC ) AS RNK2 /* Rank contacts linked to each dontact_Id1 in ascending order */ from dupes d )PT1 ORDER BY RNK1 ASC,RNK2 ASC )WHERE RNK3 = 1 and contains("Last Name Match", 'No') GROUP BY ALL;
- No labels