/* Alternate Channel De-Duplication - Marketing Automation/Preference Centre
Business Owners: Jessica Wood, Alain Gasquet
Developer Notes:
Records were trimmed and cleaned using regular expression replace
If number starts with +612, +6102, +613, +6103, +614, +6104, +617, +6107, +618, +6108 the numbers were considered Australian and +61 was replaced by 0.
For each unique formatted phone number per contact, sorted in descending order, dupliacate records were identified if there was an exact match.
*/
--create or replace view sp_advancement_db.tda_sch.altchannel_phone_dedupe_vw as
with duplicates as(
with ac_primary as
(
select ac.id "Alt Channel SF ID:Primary",
ac.name "Alt Channel Name:Primary",
ac.aqb__contact__c "ContactID:Primary",
ac.aqb__status__c "Status:Primary",
ac.aqb__type__c "Type:Primary",
ac.aqb__phone__c "Phone:Primary",
case
when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '')
when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone", '^(000)', '0')
when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone", '^(00)', '0')
when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02')
when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03')
when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04')
when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07')
when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08')
when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02')
when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03')
when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04')
when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07')
when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08')
else "Formatted Phone"
end "Formatted Phone:Primary",
ac.createddate "CreatedDate:Primary",
ac.lastmodifieddate "LastModifiedDate:Primary",
u.name "CreatedBy:Primary",
u1.name "LastModifiedBy:Primary",
ac.aqb__source__c "Source:Primary",
ac.aqb__linkedto__c,
row_number() over(partition by "ContactID:Primary", "Formatted Phone:Primary" order by "CreatedDate:Primary" desc ) seq
from(select a.*,
regexp_replace(lower(trim(a.aqb__phone__c)), '[^a-z0-9]', '') "Formatted Phone"
from salesforce_db.salesforce_sch.aqb__aqaddress__c a
)ac
left join salesforce_db.salesforce_sch.recordtype r on r.id = ac.recordtypeid
left join salesforce_db.salesforce_sch.user u on u.id = ac.createdbyid
left join salesforce_db.salesforce_sch.user u1 on u1.id = ac.lastmodifiedbyid
left join salesforce_db.salesforce_sch.contact c on c.id = ac.aqb__contact__c
left join salesforce_db.salesforce_sch.account a on a.id = ac.aqb__account__c
where "ContactID:Primary" is not null
and r.name = 'Phone'
and c.aqb__type__c <> 'Student'
-- and "Status:Primary" = 'Good'
--and a.aqb__accounttype__c = 'Household'
order by 3,7,14
),
ac_dupes as
(
select ac.id "Alt Channel SF ID:Dupe",
ac.name "Alt Channel Name:Dupe",
ac.aqb__contact__c "ContactID:Dupe",
ac.aqb__status__c "Status:Dupe",
ac.aqb__type__c "Type:Dupe",
ac.aqb__phone__c "Phone:Dupe",
case
when startswith("Formatted Phone", '0011') then regexp_replace("Formatted Phone", '^(0011)', '')
when startswith("Formatted Phone", '000') then regexp_replace("Formatted Phone", '^(000)', '0')
when startswith("Formatted Phone", '00') then regexp_replace("Formatted Phone", '^(00)', '0')
when startswith("Formatted Phone", '612') then regexp_replace("Formatted Phone", '^(612)', '02')
when startswith("Formatted Phone", '613') then regexp_replace("Formatted Phone", '^(613)', '03')
when startswith("Formatted Phone", '614') then regexp_replace("Formatted Phone", '^(614)', '04')
when startswith("Formatted Phone", '617') then regexp_replace("Formatted Phone", '^(617)', '07')
when startswith("Formatted Phone", '618') then regexp_replace("Formatted Phone", '^(618)', '08')
when startswith("Formatted Phone", '6102') then regexp_replace("Formatted Phone", '^(6102)', '02')
when startswith("Formatted Phone", '6103') then regexp_replace("Formatted Phone", '^(6103)', '03')
when startswith("Formatted Phone", '6104') then regexp_replace("Formatted Phone", '^(6104)', '04')
when startswith("Formatted Phone", '6107') then regexp_replace("Formatted Phone", '^(6107)', '07')
when startswith("Formatted Phone", '6108') then regexp_replace("Formatted Phone", '^(6108)', '08')
else "Formatted Phone"
end "Formatted Phone:Dupe",
ac.createddate "CreatedDate:Dupe",
ac.lastmodifieddate "LastModifiedDate:Dupe",
u.name "CreatedBy:Dupe",
u1.name "LastModifiedBy:Dupe",
ac.aqb__source__c "Source:Dupe",
ac.aqb__linkedto__c,
row_number() over(partition by "ContactID:Dupe" order by "CreatedDate:Dupe" desc ) seq
from(select a.*,
regexp_replace(lower(trim(a.aqb__phone__c)), '[^a-z0-9]', '') "Formatted Phone"
from salesforce_db.salesforce_sch.aqb__aqaddress__c a
)ac
left join salesforce_db.salesforce_sch.recordtype r on r.id = ac.recordtypeid
left join salesforce_db.salesforce_sch.user u on u.id = ac.createdbyid
left join salesforce_db.salesforce_sch.user u1 on u1.id = ac.lastmodifiedbyid
left join salesforce_db.salesforce_sch.contact c on c.id = ac.aqb__contact__c
left join salesforce_db.salesforce_sch.account a on a.id = ac.aqb__account__c
where "ContactID:Dupe" is not null
and r.name = 'Phone'
and c.aqb__type__c <> 'Student'
--and a.aqb__accounttype__c = 'Household'
),
Dupes as
( select *
from (
select * from ac_primary acp
where acp.seq = 1
)dd
join ac_dupes acd on "ContactID:Primary" = "ContactID:Dupe"
where "Alt Channel Name:Primary" <> "Alt Channel Name:Dupe"
and "Formatted Phone:Primary" like "Formatted Phone:Dupe"
order by 2,3,1
)
select "Alt Channel SF ID:Primary",
"Alt Channel Name:Primary",
"ContactID:Primary",
"Phone:Primary",
"Formatted Phone:Primary",
"Status:Primary",
"Type:Primary",
"LastModifiedDate:Primary",
"CreatedDate:Primary",
"LastModifiedBy:Primary",
"CreatedBy:Primary",
"Source:Primary",
listagg("Alt Channel SF ID:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc)"Alt Channel ID:Dupes",
listagg("Alt Channel Name:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Alt Channel Names:Dupes",
listagg("Phone:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Phones:Dupes",
listagg("Formatted Phone:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Formatted Phones:Dupes",
listagg("Status:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Status:Dupes",
listagg("Type:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Type:Dupes",
listagg("CreatedBy:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "CreatedBy:Dupes",
listagg("CreatedDate:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "CreatedDate:Dupes",
listagg("LastModifiedBy:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "LastModifiedBy:Dupe",
listagg("LastModifiedDate:Dupe",',') within group (order by "LastModifiedDate:Dupe" desc) "LastModifiedDate:Dupe",
listagg("Source:Dupe", ',') within group (order by "LastModifiedDate:Dupe" desc) "Source:Dupes",
count("Alt Channel SF ID:Dupe") "No of Dupes:Dupes"
from dupes d
--where "CreatedBy:Primary" = 'mule_svc'
group by all
order by 2,3,1
),
Segment_01 as
(
select 'Segment 01' "Segment Name",
d.*, '' "LastModifiedDate:Dupe1" from duplicates d
where "CreatedBy:Primary" <>'mule_svc'
and "CreatedBy:Primary" = 'AQConversion'
and "No of Dupes:Dupes" = 1
),
Segment_02 as
(select 'Segment 02' "Segment Name",
d.*, '' "LastModifiedDate:Dupe1" from duplicates d
where "No of Dupes:Dupes" = 1
and "CreatedBy:Primary" not in ('mule_svc', 'AQConversion')
),
Segment_03 as
(
select 'Segment 03' "Segment Name",
d.*, split_part("LastModifiedDate:Dupe", ',', 1) "LastModifiedDate:Dupe1" from duplicates d
where "No of Dupes:Dupes" > 1
and "CreatedBy:Primary" <> 'mule_svc'
),
Segment_04 as
(
select 'Segment 04' "Segment Name",
d.*, split_part("LastModifiedDate:Dupe", ',', 1) "LastModifiedDate:Dupe1" from duplicates d
where "CreatedBy:Primary" = 'mule_svc'
)
select * from ( select * from Segment_01
union all
select * from Segment_02
union all
select * from Segment_03
union all
select * from Segment_04);